From 4c9213887b31267c741a808d7e7e4cb681ffb936 Mon Sep 17 00:00:00 2001 From: Nathan Moinvaziri Date: Thu, 22 Aug 2019 14:47:08 -0700 Subject: [PATCH 001/293] Fixed compiler warning about converting from bool to BOOL (performance warning) --- src/os.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/os.c b/src/os.c index bcce5d7d..09aa8061 100644 --- a/src/os.c +++ b/src/os.c @@ -123,14 +123,14 @@ void _mi_os_init(void) { // Set "Lock pages in memory" permission in the group policy editor // HANDLE token = NULL; - ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); + ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token) != 0; if (ok) { TOKEN_PRIVILEGES tp; - ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); + ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid) != 0; if (ok) { tp.PrivilegeCount = 1; tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); + ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0) != 0; if (ok) { err = GetLastError(); ok = (err == ERROR_SUCCESS); From b7e506ad9d615694ca7d58783d3c63a8cea5741c Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 3 Sep 2019 19:33:38 -0700 Subject: [PATCH 002/293] fix for incorrect region count --- src/memory.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/memory.c b/src/memory.c index 222b87c2..0fe3594c 100644 --- a/src/memory.c +++ b/src/memory.c @@ -152,15 +152,12 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit else { // failed, another thread allocated just before us! // we assign it to a later slot instead (up to 4 tries). - // note: we don't need to increment the region count, this will happen on another allocation for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { - void* s = mi_atomic_read_ptr(®ions[idx+i].start); - if (s == NULL) { // quick test - if (mi_atomic_cas_ptr_strong(®ions[idx+i].start, start, NULL)) { - start = NULL; - break; - } - } + if (mi_atomic_cas_ptr_strong(®ions[idx+i].start, start, NULL)) { + mi_atomic_increment(®ions_count); + start = NULL; + break; + } } if (start != NULL) { // free it if we didn't succeed to save it to some other region From e302737830bfbf9ed7308ad9eb0cd8594ce64f56 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 4 Sep 2019 12:14:59 -0700 Subject: [PATCH 003/293] reserve huge pages returns actual number of pages reserved --- include/mimalloc.h | 2 +- src/init.c | 2 +- src/os.c | 21 +++++++++++++-------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 4e291c65..78921a98 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -195,7 +195,7 @@ typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg); mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; // ------------------------------------------------------ // Convenience diff --git a/src/init.c b/src/init.c index 6748e8f1..a0ed491a 100644 --- a/src/init.c +++ b/src/init.c @@ -429,7 +429,7 @@ static void mi_process_load(void) { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) - mi_reserve_huge_os_pages(pages, max_secs); + mi_reserve_huge_os_pages(pages, max_secs, NULL); } } diff --git a/src/os.c b/src/os.c index f44b7fbe..2b7ae685 100644 --- a/src/os.c +++ b/src/os.c @@ -788,14 +788,17 @@ static void mi_os_free_huge_reserved() { */ #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int mi_reserve_huge_os_pages(size_t pages, size_t max_secs) { - return -2; // cannot allocate +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + UNUSED(pages); UNUSED(max_secs); + if (pages_reserved != NULL) *pages_reserved = 0; + return ENOMEM; // cannot allocate } #else -int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept +int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept { - if (max_secs==0) return -1; // timeout - if (pages==0) return 0; // ok + if (pages_reserved != NULL) *pages_reserved = 0; + if (max_secs==0) return ETIMEDOUT; // timeout + if (pages==0) return 0; // ok if (!mi_atomic_cas_ptr_strong(&os_huge_reserved.start,(void*)1,NULL)) return -2; // already reserved // Allocate one page at the time but try to place them contiguously @@ -804,7 +807,7 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept uint8_t* start = (uint8_t*)((uintptr_t)16 << 40); // 16TiB virtual start address uint8_t* addr = start; // current top of the allocations for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) { - // allocate lorgu pages + // allocate a page void* p = NULL; #ifdef _WIN32 p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true); @@ -816,6 +819,7 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept // Did we succeed at a contiguous address? if (p != addr) { + // no success, issue a warning and return with an error if (p != NULL) { _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main ); @@ -828,7 +832,7 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept #endif _mi_warning_message("could not allocate huge page %zu at 0x%p, error: %i\n", page, addr, err); } - return -2; + return ENOMEM; } // success, record it if (page==0) { @@ -840,7 +844,8 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept } _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - + if (pages_reserved != NULL) { *pages_reserved = page + 1; }; + // check for timeout double elapsed = _mi_clock_end(start_t); if (elapsed > max_secs) return (-1); // timeout From f280f14e3115b0f5ff56fc60a959ed8e1295cc85 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 13 Sep 2019 12:16:40 -0700 Subject: [PATCH 004/293] roll back commit 3d8c331 and start region search from last idx per thread --- src/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index 0fe3594c..4d3dfe9c 100644 --- a/src/memory.c +++ b/src/memory.c @@ -315,7 +315,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* // find a range of free blocks void* p = NULL; size_t count = mi_atomic_read(®ions_count); - size_t idx = 0; // tld->region_idx; // start index is per-thread to reduce contention + size_t idx = tld->region_idx; // start index is per-thread to reduce contention for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error From 7d018dc9e10e7d4a979f7b7199072d6ef30e05ff Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 11 Oct 2019 17:03:09 -0700 Subject: [PATCH 005/293] add delayed output buffer --- src/options.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/src/options.c b/src/options.c index 09524cb4..5e631b8a 100644 --- a/src/options.c +++ b/src/options.c @@ -140,6 +140,46 @@ static void mi_out_stderr(const char* msg) { #endif } +// Since an output function can be registered earliest in the `main` +// function we also buffer output that happens earlier. When +// an output function is registered it is called immediately with +// the output up to that point. +#define MAX_OUT_BUF (8*1024) +static char out_buf[MAX_OUT_BUF+1]; +static _Atomic(uintptr_t) out_len; + +static void mi_out_buf(const char* msg) { + if (msg==NULL) return; + size_t n = strlen(msg); + if (n==0) return; + // claim + if (mi_atomic_read_relaxed(&out_len)>=MAX_OUT_BUF) return; + uintptr_t start = mi_atomic_addu(&out_len, n); + if (start >= MAX_OUT_BUF) return; + // check bound + if (start+n >= MAX_OUT_BUF) { + n = MAX_OUT_BUF-start-1; + } + memcpy(&out_buf[start], msg, n); +} + +static void mi_out_buf_contents(mi_output_fun* out) { + if (out==NULL) return; + // claim all + size_t count = mi_atomic_addu(&out_len, MAX_OUT_BUF); + // and output it + if (count>MAX_OUT_BUF) count = MAX_OUT_BUF; + out_buf[count] = 0; + out(out_buf); +} + +// The initial default output outputs to stderr and the delayed buffer. +static void mi_out_buf_stderr(const char* msg) { + mi_out_stderr(msg); + mi_out_buf(msg); +} + + // -------------------------------------------------------- // Default output handler // -------------------------------------------------------- @@ -151,11 +191,12 @@ static mi_output_fun* volatile mi_out_default; // = NULL static mi_output_fun* mi_out_get_default(void) { mi_output_fun* out = mi_out_default; - return (out == NULL ? &mi_out_stderr : out); + return (out == NULL ? &mi_out_buf_stderr : out); } void mi_register_output(mi_output_fun* out) mi_attr_noexcept { - mi_out_default = out; + mi_out_default = (out == NULL ? &mi_out_stderr : out); + if (out!=NULL) mi_out_buf_contents(out); } From 5e9b37dc4ec6bbc89c8643f65aecbc5a67484a8e Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 11 Oct 2019 17:03:09 -0700 Subject: [PATCH 006/293] add delayed output buffer --- src/options.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/src/options.c b/src/options.c index 09524cb4..5e631b8a 100644 --- a/src/options.c +++ b/src/options.c @@ -140,6 +140,46 @@ static void mi_out_stderr(const char* msg) { #endif } +// Since an output function can be registered earliest in the `main` +// function we also buffer output that happens earlier. When +// an output function is registered it is called immediately with +// the output up to that point. +#define MAX_OUT_BUF (8*1024) +static char out_buf[MAX_OUT_BUF+1]; +static _Atomic(uintptr_t) out_len; + +static void mi_out_buf(const char* msg) { + if (msg==NULL) return; + size_t n = strlen(msg); + if (n==0) return; + // claim + if (mi_atomic_read_relaxed(&out_len)>=MAX_OUT_BUF) return; + uintptr_t start = mi_atomic_addu(&out_len, n); + if (start >= MAX_OUT_BUF) return; + // check bound + if (start+n >= MAX_OUT_BUF) { + n = MAX_OUT_BUF-start-1; + } + memcpy(&out_buf[start], msg, n); +} + +static void mi_out_buf_contents(mi_output_fun* out) { + if (out==NULL) return; + // claim all + size_t count = mi_atomic_addu(&out_len, MAX_OUT_BUF); + // and output it + if (count>MAX_OUT_BUF) count = MAX_OUT_BUF; + out_buf[count] = 0; + out(out_buf); +} + +// The initial default output outputs to stderr and the delayed buffer. +static void mi_out_buf_stderr(const char* msg) { + mi_out_stderr(msg); + mi_out_buf(msg); +} + + // -------------------------------------------------------- // Default output handler // -------------------------------------------------------- @@ -151,11 +191,12 @@ static mi_output_fun* volatile mi_out_default; // = NULL static mi_output_fun* mi_out_get_default(void) { mi_output_fun* out = mi_out_default; - return (out == NULL ? &mi_out_stderr : out); + return (out == NULL ? &mi_out_buf_stderr : out); } void mi_register_output(mi_output_fun* out) mi_attr_noexcept { - mi_out_default = out; + mi_out_default = (out == NULL ? &mi_out_stderr : out); + if (out!=NULL) mi_out_buf_contents(out); } From 480f7683a939da42453518ad63a1d7f3bf40ea89 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 12 Oct 2019 10:42:04 -0700 Subject: [PATCH 007/293] cleanup delayed output --- src/options.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/options.c b/src/options.c index 5e631b8a..3e10926c 100644 --- a/src/options.c +++ b/src/options.c @@ -150,10 +150,10 @@ static _Atomic(uintptr_t) out_len; static void mi_out_buf(const char* msg) { if (msg==NULL) return; + if (mi_atomic_read_relaxed(&out_len)>=MAX_OUT_BUF) return; size_t n = strlen(msg); if (n==0) return; - // claim - if (mi_atomic_read_relaxed(&out_len)>=MAX_OUT_BUF) return; + // claim space uintptr_t start = mi_atomic_addu(&out_len, n); if (start >= MAX_OUT_BUF) return; // check bound @@ -163,17 +163,17 @@ static void mi_out_buf(const char* msg) { memcpy(&out_buf[start], msg, n); } -static void mi_out_buf_contents(mi_output_fun* out) { +static void mi_out_buf_flush(mi_output_fun* out) { if (out==NULL) return; - // claim all + // claim all (no more output will be added after this point) size_t count = mi_atomic_addu(&out_len, MAX_OUT_BUF); - // and output it + // and output the current contents if (count>MAX_OUT_BUF) count = MAX_OUT_BUF; out_buf[count] = 0; out(out_buf); } -// The initial default output outputs to stderr and the delayed buffer. +// The initial default output, outputs to stderr and the delayed output buffer. static void mi_out_buf_stderr(const char* msg) { mi_out_stderr(msg); mi_out_buf(msg); @@ -195,13 +195,13 @@ static mi_output_fun* mi_out_get_default(void) { } void mi_register_output(mi_output_fun* out) mi_attr_noexcept { - mi_out_default = (out == NULL ? &mi_out_stderr : out); - if (out!=NULL) mi_out_buf_contents(out); + mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer + if (out!=NULL) mi_out_buf_flush(out); // output the delayed output now } // -------------------------------------------------------- -// Messages +// Messages, all end up calling `_mi_fputs`. // -------------------------------------------------------- #define MAX_ERROR_COUNT (10) static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings From 7441aa42128bc33b06efeb5cb55f002e9a00be3d Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 12 Oct 2019 10:42:34 -0700 Subject: [PATCH 008/293] update redirection module to add additional checks --- bin/mimalloc-redirect.dll | Bin 43520 -> 44544 bytes bin/mimalloc-redirect.lib | Bin 2874 -> 2874 bytes bin/mimalloc-redirect32.dll | Bin 31744 -> 32768 bytes bin/mimalloc-redirect32.lib | Bin 2928 -> 2928 bytes 4 files changed, 0 insertions(+), 0 deletions(-) diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll index 45e0fb4843752d1ecaa1cac4c18a485e4caabf56..98949605fdb7aaeedca3807c97b347638d1a4a98 100644 GIT binary patch delta 5232 zcmbtXeOOc189(Pjga9EIA&BVk1|!Cg(Ud3##WpCkmo}Dw)KQ;WK|#SUqG0_N5OG>$ z1&`XzX&-d0>1;P*(^_iP#?=pWcExI2yQjKnze1^fs9QHTZL{CGxlo^N|Ls1{@4UbF zJ@5OT_k7)()L$0rKM`89X?NSy>2+<f_8KqmI;GV}$h%en03Uz7F;Rgl*d)P8z zgccPjgSE5W&1&%@bs6s?e{38XJ|AZBw-|)w|V|#bTew$!z9gp^fYs7tXbQhCq!=6J#&JP zGvSd$Zk)Oil+-45Vl*Nz_7S;7R}LrriE!HFTN!Tk^+w1onnLd}%@yice+*s{AvKw> zE#Av>#hB#gVv{6?U6h;8%gu&4aM2&pZjBM;W{ney9Q4WCG!KFG?nTr@B}$ihjQJyW z2nXI`wUHZ5sc*q=T$kK@Qr?zpvdGQ3CYPKAR+A<-OeN?g5=+$1jD7j&fMbhpH(RA0 zk^J-?UPiIW>}b}kK(}MuAM%^K9W9zU%%dGar?PJCb9ujgGgSBrp*IUf?EV#x(LD#N z{z$PYa%(~)s@)1TRP9F2S~S%mHy1R%S}e|*z?v5)B_*mYW?Wdm!4o zEcy$IwSHW8A3e^RbP~PfJ*Yb^&^ES2lxQVu5t|16@G39Hn-ur9Kn>n2@$od?_^eV; zYDfRRoLY_19%Q2765l=HyBRDe@qw6qvD~dIunUs;lMSprF(Il_HI~M*6Nzb2HdVF6 zvTqW{#e}Kq9i0*~jM;}IM0G*od3-wd)DUCLK2_bteGaD>m@82qvs5)!a3de9dx%~) zRW(k8%wX4(6Sy@-r&=}ruDq-(F`+TKW0X36qpH7#M7m+DJt-yTDb+YDWK3lKq@gh* zRpYRGJ+I-s^NK%&M0lbS{=913ts1@ULklA^Ucym6lWQ{4#$zUatf{_9i#N=IoX&76 zx9l{vV)b##C)5%WnDuEe*yLl|qldBwjfpz7E1q{5n9(?fH5>JH+}h-;9ePjjjT(5o zegnq<9Z_d6f>5b$Jgfy;)T**5uTva+O+(mx`rmeZAR8CTxeEOy&T=xI0jq&sq zNRKNRH3V7k;!v|p=+82NSyGc{^V%A3Ta}?~12x#VJ8H0nYRKlQQbTrsboN%w+J^rg zIE4><<+`#ci&B$WgK_Bpn*Plda9a(x{^{oq44tbEof{fj9vE8gA6mxrN8)yMA_>1m zrQpX@UJ2f%au@jJ6{JpaS}mX5Y^vwU@W?I2JW8i`pp1R~&}A%@{xy>y$#cMz7wshtp{Hj(>|gs)C+ z-fr?BO&|h(fz4>{O%-uQQ-=VmKcZO;iGU%prHbK=-u0JiXU9{)*oP^ zDI@-?YZxDuSmbRy&A1oQe>i0rbD1Kv-{lixJr`)b#_CN&#(x0qO+KT*QXbQI%KXj+ z<&hh|He*h&^M3x-3e331-Z5D-Eiz7(a!ed84wH?)$Sl_Pt>=P<&yEvlKI~7w<)F1KT>}nP5j9nyh!>P*9 z^fH+}T4yieJ*5M0`%2DixV3ibXhA6``RWhBASQUopp;`qIcBmxo-e)~YsqS(Cos`^ zAG4%Y3WpNdp0sqKK7pN2n=U+&!1SXx(v$2LqhAps<5{+OT;lb?_%J9FMRs#&qFG1T z2J>>EcrfcT|3%P#%r1`kvPkCZqTE@2$nYh?l!Asqo%a?I$xoVvtPIca^%n7YSmZ{Z zq|AW#*l7Ekb_at! znjnj|=CSs0yrDIN>+pS2m)rL0lggw5TDU2j%4;^0l`-7Ksq9EEjyC_W@h>-@9TXfv zoC&_oPShLC$9e5NBxo_{!Ffh0unq=g-)kS`juIRN?%4d^1Hm9aVK#17W+nxCzDG2| zN&=Y!VTS&&PnIA|13Ok&@KEO0=6IdfO*DDUyUII}uly>MU80ojFqs2J_Vt8^Q?&oW zK-gv}7o$l>@#za4QMoxXaQ6Z$oH);La|aC1PrybMLF@!cQbDQL)L+r(+IW%j%lPr;{-5wYZ zduSesd^2&=`J7vH>F@}A*%ewgNuAk5=E|xO26wT~vqnbLR>0%tS*D*nMmT$^?A=EbGW!30@b}q5Ah!%C0KwDW z7XzLEyn?=03cQDB#L|SKnYlA>Wfg9jRrJ~;+uwTK^S2$>8oebmmW9!Y?5uOFPD_b) z6H>%d5_^FC-C0dHd2_bT67qw1c=BXyId+$5R0J^kT|k7rN`xIq0NQ{WzykX;pctqG z)_K35`B(btEMGJg*~zjFTF;ix>!2QHoZmtH>>}EF)==I-o$US!ZoiIJWNiz0Z_dIF zYG>b~b+P@GytlBbgZ44HsDoPAVYDu9+2SB&uIg6Sw^V|2+%oQV5^XD6Ufn@`UZblz z3}dv_@LV~yZf*A{QqQMkVc$X{X(6qY?O*BEdelDATln-`ilGj#;$da0c~Nm|+*<#b zkh{-&at$0T%(#x%esP_fwjJLJjX1VHrN1Fb>*SgeM>p{j4{y4sI9Ar^*0!o1tt@V{ zTjL*yb~YtxrE#HX4_n^k*1A-!+iTq7razsLKO@KCD40IiS(IN;lwX`bJ8!O|ASZWj zK~ZjA5h3?aDK2_EZ^{#SGp7_h<~{xM`B8k<<{hHfzGD|nlB#N?s%2HRRb@-6)>f2D zWeaL6R!F02*x;RMMrlUbk|ovir4<$BRVymy*H%?84VHlCgcjYLq0wXzrt=qX&*+&KLJ0uI2`}T0UH8JA-#YE z-VLcYA`o0k#ZOGY0$vY!0C0hOAU^_DgZm*PMxq~Fgv)CzkpsLHvRX!t3h(m56k&4Zx0pSi^{U*f%44eh zlPUwM48IlfPrZfr?1I5veIb>%*}SW-&6xOqCiPbCyDn%bB^xNdr>WHI+TR-aZwYr1 AN&o-= delta 4875 zcmbtXeNuU+Iq?>Kwv7kp~ZFDEh(9iIA^{bR14q<-2jTcNjO~{+- zS;^Qg9}`(NG5prl`a6XPRW8J#<7vhR9fdv>DC* z3^ezVdVth#qS5r_ppi$Tc@@?7Ka!1R8wTos3W@q}5KBkiek-XPb!e8PqPaW{&3{RK zJq68`2sAS(b59tW7f?WdB-!3bHK6?yr4GQ6cZ9wzT;vXI?(`@AfMshjZg`ck)1hHZ z+B&lel+`31$4E?CDTAMcK?3L zWiU%EC1z0yxgfQim0IHG!6m9&ztdonTC_GOQot+i(&m8;>_=3(5@pOd$_I7rn)V}n zTi9lE%n|ra=#g4ZNV|&6R;i`P?3AX1t;>hPfRY#X`JKv#CNnY_R zl~H206t`%Xp}Tm(yLY$r7Q3|bxL2PdOyoZO@8|4$DOmVA&A@aNv3(!K7r^ z+Zh*zYU`i|tF0pD(mot?b3mijI*Emh<-Os1k}qBl1gKJsiH1g(q?Tf{&7UA|j`&i* zVpkX^3q5?VQ4~ItyN&N@geG2Z5`{Ycys3G_w+E;i*%JMZMo5?av9Us2^ixVjF@GG| zm{WBp<;#W90=H@>YqazE!i3umE+aYh_?sd5U~)Y7B*cZ6sz$q!f0&RFZc$ay$bU(g zVEDlh3>kn#$S7`4j0-=m8a)PHm6&R9scJL%oEQ_&8xx`o530uBk&(voB}N%1s>XE4 z9DXf1j;w~8ICE6ikC4M<^{7T~gi;4r)s~>oDDFubW0b>H5n8jiX5=l2 zzgv&kx31ufpo4-Ah^sKVhI3dtQc6f%A+ExAFvvWb>Mn~$_~sO{i| zDlY|>RPF#TQF*0=VV#sma+Oeo@!et+WxpJ}7m|eZ5>w`0G(2t*`QySAVGuJM%&aCy zay=BEL`iXdB)?9H*WR%QxvIdUy1l>p>P~BKLx&L6txbT$(k9~h6$8p@j4ivBoeso@ zK*tznsp%A*gp)ivzKdJab7HkV)P_;4(k@EzpLXT2kK(QAI(_`TjOCv3doFQzdScd( z_b~PbRqc0OLd@1vmXqg{pS1O=MRLVoq<(tr``29JpQq>MS!gwsoh^1@0Og^O`*H4> zKzwKyW0--6*@~>bL~7Y)u0v*8=6qh9kv0+e=(#aHo|XW{;uaeoMJ1>bcDrHLuT;_T@gCfmHIcg#Zt&zwF z_2YT#xJu2}vHZrkNg8h~A8(nh*%!;JEt>^_e``6akz)A%%n1ppF?h0~q6TaeoS;pLIJTQl6y-C}zqba+LQZ1G&{qI9Z1P7;7+4eyE z?Jvnm6UzkQFY<;--wK++i##PegST22=r*lIjOQZ%(mG8uV#uEKC%pylDz44nWx&s~Ppm>3ffsOhq zgAtc*1_A@)k>qc~NmjuZmpGoTyNFZfqS2vFELwI<-6AA!`$%yO&G8OAj){~i z6QM$)`G51~j`{iLK!El~EW)VbpChruG#$K~!u-0Ac-gdN@vk&7R;%*}{a**S-@Z%x zY?ItKtxdT9^G7kJ?mVW580A$a;?3UU-+L~&c9wy|w0ag+YeI$R$=i=E{5eRc31XI1 z$$8plMRggQZUh4U4*2korWbNk!5EG19JdyXE|)5UD`@xM{6Lw}IIPHG)hPMT!U#i- zV(9UwsV`b_pdQx3;n)+u#?BayeKHuER>?gDaTamdx%pS;vBS=bg3jTU{Az({y}mM7 zmCZjFc4c2{6KDf*1!;aR;}}HxbreJk%-3pDaOkG;)*0(HIcK@HaICHzIsC@6d~#u? z=F>BLY2o}yHTVM&NPkRe>y&nb($*{OW~FUZ+AV1DzK@DV)-XOeFRndgY|hZ1YkJlo zIV&my=7Hch)Q2}Q!(C@b(RXy7d`%N6#4TD}w0OzA@1=AVOkHrOYV!QA{#n@~moBIZ z5pwzai^gjs1s1u5|FWoBI3TZGJYRD+u2F!JhGUZ0L`MIM34Iv#Dvb<#IsW%)fjD3{ zPzBH$(^eG$rNAoLxa0?+J;d}{sW(EvpNZvNf|IW;?-JZRwW3St);)aQt!f5U4oAbtGfh|A4BVq%hm)0q$sN<*Jvj>op5$> zM|EraI#KY*sq3mk(BD=|{j(nH)O*ueBTY)=*Psomnw@t%-l=!18j}~+JuG0PV^2_6 z*#@d9dSj=)H!~RQkxy)dgUC}iQRx>pbqcM>_f(rTjGG#gwD_iJ6!5vixwj!n@D6o3 z8K~;0}jxC*9Hhw6jZ)=j?Mw${QY@-s7ZM&s7 zUe?^HZ&W=RdGz*9ZSQcjeMgesG$9!6=4*F!>K&@)l~Z?i3i9<`j&SbSW8%dJ!er;3 zy+Sy~QGfiqy@{jOS5&N7FV<9xtE_N^N&#~D)2D<6 z5uZ0J01G~idLifHy4A@-~(r)87lzt5nzXW0T98xkO2`k za8nw-2mvd&6S5s}g1aH#2kO9kA%6lI!P!{GvVfi7HprDgE4XuP0|=C7ptvF51>E4h zklzC@f-`&trU4#sE98B^ac~FZZomufhP(jufcqc~>Bs>tLM{aQ$RDx|@R2{{alqdI z#S3x;7zD>@XQS}}dL7&fSq89j3=a7f5C+c1VOszbxCnVC>EL$AdO!qkh3p0_;9kh9 zfEAoR<+Ff%aGZ12@BkVc6dY{U0Z8CD^6V494vr(v44HUZfQyg|0SCAPayL*3?uL95 zaDwChvG0L8a9jwM1vG-=IL?ndmzsOrQp4g8?&*xloK){2m3z= z#g>DLa*-TdoWj_1Ko7VVa$X(+DFE_|Y1lLJ$3OUkKp*))9-oett-y>6utGo*xD#^3 z3``2#3OTb7xxpJDWxxvVeE{UMnOJIax}C9@J5V*a9dfJ<1A#jrm(IeTfx98Q?!-Xg zY&K(W17YBUkg0cJ`@o%$3+7G1_sY742$*Or>_e39|E0YuNt)n_rdx diff --git a/bin/mimalloc-redirect.lib b/bin/mimalloc-redirect.lib index 149dabb71bfbc6b15fecd389ebc5e17af10e40a7..1e22ef12aaec87c9544724a5c766ddbc9bd90ac5 100644 GIT binary patch delta 62 zcmdlbwo7coH5SHMlNs4HCjV!#VVt$uhV=*|Sik^C+OSG*_GOob2&hc%<=}t{Zsh1> F0swn<63qYr delta 62 zcmdlbwo7coH5SH6lNs4HCjV!#VVtzthV=*|Sik^C+OSG*_GOob2&hc%<=}t{Zsh1> F0swVZ5}g15 diff --git a/bin/mimalloc-redirect32.dll b/bin/mimalloc-redirect32.dll index 0b5f515614df59b6ffb87800d0fba95885910459..b19e42cd2a9106bbf80260c29b32ea9f9cfc8a2e 100644 GIT binary patch delta 4050 zcma(UZBSEJ_P$3#5(wl4MWP@C1r!BKAOT`PP9?Tl6g98*tXEFNW~Vf*u_9ztvIcg#v=9gbudMIL?} zh}E!Gz$(VsY=R7QkO)>Yev(blc;P7&*7L9)`V}HnxT?9X@!1^+?F#`0xTIQpJs+@U zrYA#sNEF%{Dn;l{`+NWg76LdE0iYSI-7zu%1TlaX8Gu*g0eln-KtyN>DgYJa>1ql9 zFR`+dX#6(@KEfI{MD8d-9A4&KvQL~1FweJBN4O_66`Le$*tnDUprmdQ5n0Jkb@J0L zCc>x!lni_gVRh44TpMx9-R>#|IRSbSaltP>)~ zZPGd&5k(MXGdqz=W9l+Wom3-W#_>7RN$pYQ+(|%^3vh;Pr^5tK1M4h7fo(35&z9GHSuk=zSU1018Dyxz}a5^da;U8Gh~| z^oR`iJcQb1xc?z^y9{4?2+fz_yHN|UB!&8U>db5?eN-U7x*b6Jt2Vy*pb6Ng?0mVk|2aSN!VS+ zNvH8Xc)s6pS~+TKuhw0p1S@>K^pkzG!+F zUO0?7Q@4mI1vNEy7}tVPT_ZLf_G>h%D?tZW0uHV?Ei)JR)6<}Sfgcq8mTc$Z=;qC! z+i)vU>E=(HtZfb8Wcbcp4UahNBZwUPXwVaxXW(g+db&sul0P|PIQEOsj&SU=aD|Uq zi@~&uA2A7u0c-Cb`~XHkK7q(KUol{u>G+FH?y^YD!m{;LL?o<(-N}Oueilwca$bA@ zq#4CP1ld7+K~O(}G@n13nj998_ZP6MBl6+wQXcs%Y= z#*3FNPGR5XaLM8oY!8QDUc8NsLqXD*f)F31f$i}dS+xR>#P4U{ z`yQJTUVnQ21%8HR%+El`(o%nP9L;vpVV^{ML&*r+Ez0yxTa)%EEen1MCVVm;Pf(^mux%IL=?BNwC-ywj~;gm00lI#Dhy#NZz^`x03F_J58fd!&-&BI zl%G^i4$X2cBK2@l1X7ez5k%*EB&=kC0cR%jhc=K7lHLHzG~HPzyra0_FmZL1aVVq_8s8fYmD_*u<~#BP(NC&(S;L z%z`K&4Ly`(y$D3>wcs`V<0~4LNeKJ-ojdWo^%h|pxl-ZF0GldK5k(*2kZb>AQ1Q}_ z)B<$$M7|Z#+u%&XAz;@*#8i_5s#Qf;q>fsGn!OJ+Ne0=pQ_`(!B1lHs)_`Cj$7m7{ z%|8S90!MjpfisYF0rYeXZc8gl?z$Q{Wy?(GI>L19k)z%7$sj2enXQM3=Eke|PTEp- z-Blc!UO>9ZpA6r!0HqX?N##%GD4b8BibB%Up7!*3CHRW;yuGH5!K3WL%-|O_yvxx} zSf(;q@U=~TL7p6*;X@Uj1Kv5_GsMO-c=Osc*&YrdJFI$7=h`nBZsQwoKBrs$*<-zZ zj!TnYgna6$U9XK`n($py0lsEZ!MD*}Q#q4Cac8NDG2x@7<&2x+xn>n}5SND-WC{?3`>Uu}ag|(L2uAw|0rvr>2CNORGI2FLc`4mg zEf=AH+hTmO8iW8b5!Z)mNcPH_Uxntb0ka@ejfZN=L`^}}>Zj!5gMc*Q=bkDPd*P`K z(kg~RDaY0wmFdQf#WBp5Y zwd$IuZFSA+#W|*&ik#o%*mC#f9?Kohm2B0Y)t@!wU;I4^ncQStN%&=`RX)7rXkm`#!zH< P%wRHXF?h!Is6+k-%YEi5 delta 3652 zcma(UYfxKN`rJ#>gc3-2l~@r11zJETkQZs8pzST_klheWX>DntB)ci4mO^m5BM3C5 z0+C(lai`tU)v(rLXLY3$r*@~04O$D1I>XlP6jzks;?6}_C+^^yv23&7cVo0V`(y9S z`Of*?kMo^#@9pkpy93Nn2{S!hy0xnz>fPncrk6&;rsM4WA~&pu)9#JSci4_Pfbqg6016l-^lwaVb~&u> zdddkY&$l)>8i2N8Gk|(rz}Qan+0;lktVA##qdNctr(_k3R{A8nHl{TKmR=YraMW{_ zH8UqgtGfOzA!Ol50)S>EfYE3G`$21=lmRd;2GAG@;A%2}U#qPSbd=U}K#3p;Z;!E8fVbKWcmer0`uESK$2@{dE`N*_nTnQc}*0GtlDLfnITsb|5MLf zk$nuKqE%5@DOUn0Bkku_oSiGgk?P_CCYMnL(ko{Em}e*|iDjprqUWN!SA~M{Y>auE zjly-<-CRx?RmGm9Q?V5^DgHHjJua5sicgMdV)*T_)ewP+FO^aze$DDIdY_c0#eI+d zM?xO}6na1q{#ZgA?n19fXwO~f?<92OF0@lZ$H8(~w@%`ji+^3TVH;DFKJ zXAH_OSc1Jb9NLRWIyeda_DnQd2AVSL!;O|Sf+cCM*8!H6P#P^Q!3?1-k@iUQ1ji$W zWIp78hfVG^w-x-c5&H)>447k2AINoS`I z+)i*aVYL}Sjhp%X6tv+4=e-6VCb(-ig=un0nBF5#B5?|J294fodv_Cru}=sC`sb#z z^CrOzr^BueOrxmFGTm4PqNv?~P zKxX6@5^;ulu5p&4AaO*yIGHJ!hzN8F^s4K#x;JHb7Yx$!HS6Ab zPk1b@B_Y@*!cXvYFgb-C<>{+1J;l@alQXhcpix3u1%$qDB26TiD034R)d}{qc{-%}jX^cY%|jT{YABPC!4fuEr+DE!q-a+i zhk8FTkHv?3fgiN^;7;JNq;C2}+JfY-w;>6>+Ve?z4I^2c6Iu-J6D2ie6iNzE`K>wH zmysiB`i{4r`Z8XPvI7tA4gQdip3dCHcKGNgnQ2iiNQR4tzdcJ=XH~JRkM7J$iRwTy zY$bnumiA;7voFkgE@iO{vyon1Z;%xCf}g<0^v?Q2UFs&eJr#MY=yIP znY2q?7HPl5zZeM{0k^eKVn4LebuI{fCpFu<(Nc3r$vy-KUT?f3M1Yp-eaDfDyJvq6 z1-FLipv}0rB`Bh-x)`~98y=)N+m$k;2T+PYVtqrwscuK(xEV+w^vbW8&9-_qteo-2Mu( zjf!gi4|geEL`5%haNC~_E6#l_*oODaVyS&nu&Xfft-!WHl!D7?)bt?>`=}N2X5J7Ilwa`TW?(!*&H>=_{#)o(1|@`?%jlRWiPq>* zTfmDSQ8L}M`jJ}3E8slfgff;|#wXyXElSWl4|q9H%V^~eC0K6QQ43m#2LN}|*jm&- zZz-qeYX1!Q_|6JCwo?I;<2#Xqc>OK~Ge(c>s-?%6?AByf>!RUr2IY^Gla&53q-WMUIyRO-y)|084DwRK>T5(vOy*;0JB5%d_x6w z1Ev711+W^i307kQ-LxN7KmsklydQ)higI**vJtb7ZTwMiV~t=IWK49tu|`xMin)^3 zTV>*IKJcNAFR=Q$t~_i);p|3lF;+1e8!h8IjH15KRHcv^mO zTGXX3v=%lM9w~Gd4i)~hC|D#>%hU<#WOcgwSL(0Sk(x?PwdPUH0nK5}lbX*o|I*yn zFxu5xxi(RI$}ncQXVdDE`I65|J}4zB_>qBNMm3*m?$`EcztDcAUC~D9R_oU4s&sWa zhpt0+LPvF0y+i+u{&oG=`cuVEZ1P(+N0cO&oZoV#j1<717#P@CaJFE5k*CO8G*c9z zR;g>X&Dy7R7jz%#w(EE6>-Alr{#*S;{rmbG`rG<@i$5){GMEiJ4SNk%LzAJ!@VJ2x LkFP@&0p0LlA}&@o diff --git a/bin/mimalloc-redirect32.lib b/bin/mimalloc-redirect32.lib index 2bc4b0bc66f9eefadcb14beee53034bf76e3797d..c99aa32ce499240310b8ff1274d1690c85ec9a96 100644 GIT binary patch delta 62 zcmew$_CajJHx|ZOlQr2jCd;$hG0xf?$9jqpEMNd6<5;CPm$J)31Y{ Date: Thu, 17 Oct 2019 09:20:57 -0700 Subject: [PATCH 009/293] updated redirection module on windows --- bin/mimalloc-redirect.dll | Bin 44544 -> 46592 bytes bin/mimalloc-redirect32.dll | Bin 32768 -> 33792 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll index 98949605fdb7aaeedca3807c97b347638d1a4a98..ec718f91c10dfe4c47b83739df2764fd25ad8581 100644 GIT binary patch delta 4901 zcmbtYeN;1hSh9Px7C7D9~lYkpe-45)8x<9!8AYl{;DzbZqc#am9l z1W-gbTr{TXs&NW z^Ij5~*LR_DigceUrywU6yze12i5fJY3TS@28jYX1_$8A~BGTOlnCxxZrTVq{&EyED zrZwu^{ZhAjWC%0!q>ScqWLBjiQv3cZpv0LqgO*5jAtH6BoP$!@dn&FY8vl{{)YI#Mho zb*XE>7W!cItZbF?*SQ@1I`OrH4|E*u(6s6w(?N4e?J-^60nrq12Za4>OQaU!4MYe zd#4GB9Hs?1#jA{r-bb5q%2p*Z`W6l46zg-+rc3oX9ig<2bpFrHXIh|mlxFLi*Z+Oz zpASFIhCid@y0TUGG3ugAy5cq+D)RX$gvTmb!0*Jl4g9xpZU_G+&Rf9MFqgW{>D?^t zo9FZhCO=a55#}Rh3CAv%!#L5UlFO2N7cn`Z1vms7XgTK z9h;u6BjikxJb$MTKolutdfl1h@c0fZV5AfRcrAz1a^gywRY$Ro+`U=Ziarcv0?C#na&h_7JDWn0Fo|<>%@uxqbr*`dYtYaE_Xkb4yfwOQz zWZ~mjENwc3tyiYps@*9^nc^QIFUQ=COe!yl5cy^a#;~f046j&aE?bo_t6xE07+T;1 z9E~O&pqAaK4~VQ3?3#z6NuC{XB?h>=Q}l5mFxuVcL2O=~b;l zDU--^Wey_4(310YsSxPM!Y2z)&tg|`H?a+IVJRNOSQs8eco4~%9Ogm%)>{JWvk54@J6Y6YKPo(xexi!1kKE)DQmY2vZb(eA?TN$hvYuIjNGfHNfax9c+ zucVQMzehCWQ9)eUaqSi@ab>LNbx9hSenQ{dqu+PsqgV`CJa;N*)I^*+mdmo4t+7;eD`H(KT?TUcn?~gH;)k)=47*ttvg7K`WUPg< zn(Ttpqblt7(o?0XDF~WzsC6}`6itI+jGDXD$00(c%*1(uokjVGEa0AHr-S`LXIP77 z`AFeFJgmr?vzw$L=u@lNCm0=hVlAG%MWc+r(^H! zrai@5QTTU@v(SE4T)-F3Qgumg;<@9nO_`-TO7i*7e@34xdAh)fUyQN5lXBZ4x8IlB zGjjW~+`8rV6}0$~lg1*X(dbiadlNPmF8|_ZyYv{|(n`FCK=3S&tpUfMG%}8{@k7C{ z`BZN0GlxqL?>uy}<7C5*gWUz&PF}y5s1H^gRP#wk@dHW45!KOP&7o@?ca)|ceq5cx zk(5`0PakgNgEyZ2T?!3VxqA}?YO2OPQZ3Ucs9=?;*DBIEg_>(b>aJnZ-eV#)9FxZb zpz{i~*hJc6lcm~Pk(z2{8Ul?f)P7u~-s7@Vs1vEVPNs8pVsE|7-1Q<2DAdp(QcHs@ zoo@*ACS=i`Mj1>^GDH>d+U1`41cP3O+zBXPX_k9hrwkzl>@K+{oRncs0rx4nXK0aO zUV)y|a?kXA8KMe!Uy^&~GYooL5v{G}JaXBiKE5j;8v0;b>)5 z9XvRkwFq`vsI=SbYmtlW)7qRv$cxBQ&f~?SmCd-N= za6z|V@?g3MCCq)h=YI%WKvV`q2e^`OvQ}6w$C|CwaT0YYo!BuI0RJ3X|oDdO$`l|jnz!k`e;7$U1hUY)v;pLG&uxkrGw2iubI%c ziw|99Kr)?8|NY-<|jjt|CMzB%~2<1$Ymz ztO&j_OE)k9->QrFlWqb|?8Ezjwqx5*P`M*xE+zqb;MIqlPi#m}$&Bwm3${ zHRD<1q4Dr|=S0thd9q?MYf6}!pNdRHrwG1x{j4xvzc=8W@rJ#2UyIM}>-6>bjQys5 zso&gR(N6|61KNSC0bw9KFgMUMcxli(7#OS=vJBaVT84ySao8|y8jcJ{hZl$2$TiKi zz_pocEq=GZ(|^ft8j(iKBbE`(sCHBsog2M98X3Jb<{b-+S;lSSE#u;ZVZu0JLXDym zixXs0GpR+DLX+XiIn>HM)j4%(iY16$1(3XEZ-v)_ifMgWJ^?km?VI;Sd{NZR+wboW lWcSaYc(wuifP26&XdE;RN+{ssAQ{pOX@|0ga2hTA{{iR7gy;YO delta 4310 zcmbsse^is@`u#pO7$|HiC~4Zq7&^jefIo(SO(XkC%&FvE_he^W9fF`N7>1=XLb{_x zflt>hyy;b(@jr%x>q%y;-}@`)z35`{&&`&yVl< z{XXyWz29Ez_pJ3(wtZ|KBX@2g(O-xtxoK>Yf;@Qtq(FEOzGJd7A42X|KOG^u z>}_S1GL#LMBQhhbMliric5aecitIQ-EW%+ZXYYu|_=FbXfalpeatP+8x*mjI{d6>J zt*N?ZD_S98&p;Ghg6Pcch~A1vv?mYI z&|ury(-X5SOUTio*PC5)T~)Tl|7~yARV!N>%C-yR;VdC0T!az&={iCf#xvPRB1`iw?t=@rTy9Pe!V* zWhe5nl0z?1uf#v2RDPmVC9@^Vuw)xPMoQjKxi)D{#LSFOv}b3aOESwpnlLC$=m0{n zN$f->v?#b}pA%qIu`%|SS7<7~dd8)g9UcPEr!tiY`s@gFk?#>b%=ip|@Nh^q*McEXnOB-oBmh5fU3v9r(7F*wHDcigXsm0>DYx~ ze4;yJME+EiuRwkz%A1kD9OWDM^9NK`ESLWHwo>YpC@;ZqFOP@AG>U<5-m5YYOTzhL zRS6xv2^r<6Jt|5_Jyq!GBPtq^r8ab3dK-3#GHERV%LcwJWe?41FHsqSS~0dX(1(KN zqG2S&GMk#FqiG^j+(nfS(pK z2?dcirRl5-C1+qp$~b5;R!OpY34zlK=0ST#`f@ea^&uWOCu!BeouA{Ho6oAxyd(bj z=I^QgIMvfLA-HZz56>=K6Z7?(1THOH&F0UBIg5_511T`PXq+8LhQsP?)|w0#)%UWG zB!fJ2KYMx>ypTD_%4R{~omq3foQZSaR-)3Dv=yt=c^~9m2&d$~PGJ9?X0~)Dgzo%; zjsFdYCJAQa z?K61$NRYeae>>+IQz(cLOYjtFE@X)Oz=!m?vRI|NrHGk2ZnQL{wz)w~a#FNIsue*Gtg#iO6DeaVw}NnMgnjE*zK|9Lc_vVk!|=kB++F z0c!Qhga$oS^cyXDsKO)Yp~7G)vRCv_-SXHddeGDfV(wO6jRouFt3k0mJ?lNx(pDR% zTCL~XiWv>v3N`gni^CUL!b{6l^Y*+O4%1Ipw0HxlpH7>k4yOGJt}kCBiT^k5$2x6l z?2UKAVX-l5!JI!U{pvvy>BM)K5aa4wrJtY(k7SO)y7k1e1-kO*-<%(uUK}h-HXSO> z)KD3Caj0-@dOD1E;SPzrSudq=$b>La3;T=aYj4?$LD-2;=~Su7?gAQ4eM|kDaOZA7 z^$I0!_utG zP2av-zH4v#?uz*4Mt%QreacsdzM6s#amzXHrgLt@d2ZBs;HESEZ&E=|?tI<)y5XowP`KEq`c^Z-)zSsZA!&PjDM&>{JyB`xl zFxkLRu7=TycxWnb1a7+;blYtZ+^$9&P*hk!sGw>aG*pTrN2L{XJJev@VFOu}8Y9F* zPE{j}R*CA#Dl2qWi)^6U3dWskuhdKZlwu?!)vsIkFtjc7JrV+E^O4Gyyn_{RlM z)Cw?$;HVRz!XkjCUVur2z-|F-PY9rI5FmAr0OJU*Cj~G+C4hFXaB!ahBM9yL1u!%U zz*z+dA_z?aG(0VUu2}%t0Rcu4I$H!VJ|lqoj{<}chHU~^TLs{s#q@SekKlL?)3;&z z=LP@d^F{`2?H8c3;~xKYZ$iu~K1PItG2cVz$QmXAj=kI@t2J$_tTk=5!1klcxCFXw z_}RGa=zXYu;pp61QOVAlZS~bAPMJ}sOn_5Im8kvVXl|yZqOy+LTDFU;HkEDTEH#|D z#$wuKsVuAJOtrN&wRPFty0U7#H-O^U+<7$g)(X?ss>)s4x$>G?&Qeyp-DKf1>Jp&w z*nB3zf6c3iIT*9)pi{!^2C4Hej32t3bC`1fN#}Kj(ZgSR(i<=4;PnVEBBMyZLogs8 zLOPg>;c>Xfk#5Z+WO5tcYLL!eiZbM_NPj{YL7rPih-W#*KwhCmJA@GOqe!JI@C8BM zi1aMN81mzk77(H_kw(H567o0#hsoNJE-WG>6?r|7T zM~G?_Ar;6Qk>=sF^T?Z#?zk7PS;z}W-!3LZhP>iFLOw=NP<{;|tL|4|5PWFApO6|J zgP8nH4=-R>R9vIoC1h`u9*NRZQTkq#2BTE+L&RSB170)BXO4s-j}#YDRj4Cv_Wv2w zU*Z0el`;(3$6$?0#&7n#C~3qSA!#4%91INVhWH`#P-tj!i1Ro+F3+e(?=^U>UgDGa zG(N#M>>J0M2C))?H!WgxR5;olimucyU00xMq$|{Ab~ZR2POe+st?wS~9_uDO)*f4r z(4*?Ow0k(O+N<&EyrbSR@3=SU4SB6To3Gv1>2vrFnIVYW@ThZ<4c6B!j-NW61ZheoThxE#N z6}?=q&^z24=pF4f_Eq#X^eOsN`_=t{{*nH%ezU8=Wpi z)&bi<`#|S_V?Z;g9pnec2ZMv5!O1}~)IQWXR`eHA`4ZLcG8HWP=;p>U)+IETOM*kkN}01G7X^Z)<= diff --git a/bin/mimalloc-redirect32.dll b/bin/mimalloc-redirect32.dll index b19e42cd2a9106bbf80260c29b32ea9f9cfc8a2e..9dfb934e8b241c583184d215f183872f17709023 100644 GIT binary patch delta 5422 zcma)AeNBYF)Ob^qhv#HtVy_ZBg ze{Bx;$^AWF@AEwG^L_-sU*I1XOfHr@f9AqLV)e5`2r6^QbrfL%#>Z1%>=ty9Xe(Kt zCnJd(tYaYwgK(;HgTzJ>#|6N!5rhU^71Rkq!!}h}y1w4$8u5Xk#sq}ICg@{wXYC$% zyYFKGe!bJ-a{%wszXIukrw*RKM)iV<9B@Vm0Ubm80l}ce8bD8>?+HrQeU=5UFg&N= zpZPloajZy05X-Mifz*`%`E4GMXTW6nAm}Y8vVjD0fYet2d8`bG6!Xl*Kzi}phueWf zv6rW@MG+SKu^fnJEs#g?+s`rYCU)A6$JmOY36oce0-%aoAY-LKUIY)Qpx{L$%PuaM z1D}C8Nw1jw2Z%c`Cq$2ASE8_R4I0cA%7hY9vL+s|OZ zsmQRfF%rw3#Q>27Wiyd%BdOz*{RAU z;08i3uc<{{YfV`oLocs&mSCNoOgzY}gsdxiYw=Lk2|gA-st{>4yf|@sJQX6SaqHbs zA;+;NLQFi10Y3IRei}(ahkb14xyt-kxPkH2?*0Gh}CjIowka`ISbxR;8$dj z^T!ESO95FHAxlnQ@${clDJU5qSxd++X@z8M?CzO&a>hxLK+U<^Houg>I-($x42&iT zUqy&X*g{qbXeV3QLo<`P8wKJ7#0GT~#Ol!NdC~$k6quir^8e)qSe=s`6wfOl-$H_Q z+t=`cquuMYq;IBY-2h2`iWc+tk}n~evXM8?FS{4s3;s;OT zU-S5K8o$QlAExmHkB_BsKZnKp)7Uz*wBetHHEqz~*O__T=7CWOnSbgf*ht($z!Rq~ z-c6-!?&KIju>tgE@oovvDE0%R3H_;9u@N&)z2_@V>ngD5>)Xh(NkQL*iMV^}z5G+} zfU|FT+=I1VMQ1iC$cyORO${4yOYj;K*kg;k9c%1F>q}OLh`n$!Q2VN4;bxdzW4HwGbFxAck|UWxoPeWURz3JOM+! z#)*X>6AZ6~D&s)%FXv&MQ}6nU(qrV|#gr=KhMg0JnEBAB2Y)3Y(23IJhbKQvr9xIw zketR=2j5xXKK9TZJ_dsQ5h%ctOG`5Hz#Y+Qn5>UiLGR}5h0RWg=rwEdd6|Hf{)KF? zA{%{LwpAb_k+@tzu1Dtb<2n9Xg6bvMXV#;+^7CW?a&O)S^!d#@fxfl5188wYA4#E; z72Bk6%1l_}Efe-cYcJ7DqzWbA>g|e$KrEu4C9~1<^sCsruJZf!GLi~1cLJk#h|%oj z*P%G>x$i^SsQ={FZjfOo>`SoAu(niT8JK)t`8k<4^M@_F$sHoXCxWsBv;=GfK8d2aX-fTo6fsK9o57>(Y_OD`OtJ?gqh&U#QG0^1M$4Yv$amn3LJX$-icb$>I z*+alI!-f@X65aC|D!@BkAR@7`fBt>i=XSJI^;)iI9Jh|7h5hCxI$OO53URY~OD=`E z;GYfMMB;7Df*;*P?b|ks%P^Y(FH+>+rX>~V&22A_IYV|TU=S1eCw~B=_c1HZECz~_ zxXvU2!q9ZlhTkq5jZZE629CJ?5+_K_So1boOoZzn&4uT;0m;`e*?!NBpzhq zfzdeOQ}o%vnn{GL@nACrev*(*BGiNn6))#Dao&O35QK94_;)zP;^p>KDuq7Tp+%OO z4Wtvb)$ANQiRBb07r7?^KqNM{m}lf)o`;^cV)yQISn;!uu_F;2Oc2)U5fFma4WV&W z$l~mTyj>w@7shruyuXG>Ob{kvC9Jmm!UFodR*ovRiVERsLG;ZNFhg8LhS!G4<)5ME z+D&6HyM#}Fok`%@#d098z*RB`bTlCxt%MRnx~o+>hpRg$$-f*u_1>@$MtdX{jXiXY zNs-r?6@04!b7m#LLZMdzqbtPmN;r`39eene#+gIb!$L44V$u9uIC@sS3D76CX0&<7 z#<8zKvyXwoTSXIXq%>$x;tA9cT-Y~27_ufqEkeCr!g2l$JBBodH(|9A5itxMmV|Z+ zwg@55e*`h)hwoz(aJcqbpp`*f(8}If#I?moYdKhkM62td++78I$5#CZS9!>@pYy&B ztXkMQ6x^Y%`QYo-nM!i4FT`sXpWGSOUA${9rLeF7S#re=uGL0yuydkuCKVWkB`L>e zFBG5_R~a^pPF~DS@vzk0fdeVydc5xsNmSQs`w zJtN|(^EQ^jc9@5m+?dTUUBJZCWnaNJ82budMUY9rKoY*4&?T+>vVxWOW|GU44+u~C z<1n>iZu*bJei?J{)1U4hSaIObYESeriHt)*jD>C@j_u$bCt$sT<3rzajN1XP*V_Uas?FJK9dj^QOXR?OXs@ck@y z%zX)qtyBxU^FG4}Hk`*2yn#v5hQHyiNt~h49p(H|Huj4TQYjbAKR4tOwq&sgZ;0=M z2qZT4>^)0-bynev;qWeZoEt*a%Km~^%)cA~FNHWBTv&G!{Oo=7 zKlSEaaP*|A;dc^H5l7*ef_I9eQjQjL6b>G0#<9D)0EW+*dDikh5=8}_4e1|hxQ+YZ zvF(FrFK$GFNXsPfj`yl7AiJ`6=Bi*bdA{wh1k_{YzqEe!_#@VfwO`DY{b1HQ<78xc zB`p~w+b1QvGH4u-KZBM4EoG3VO-e>GXtJ#kk#^oA3ld8PS?p4h z&Y-h^x-uvZ$e%$?`=t<9+RHScNCqVUEoG4H(L!{!eeaY^gf2YVJ|&a9(2gl;Z>KiN#2STBN+1Qj3Pz6KB6=X@SO z#yQ;~K_@$!vF=ERBzur1sOhxFC@^SX*wT!egPb@2K?;iWKxfmGEEg>T$as@=QlyQS zNKmjdS4eGzpMRFlhp^^SCugnsE(K*=wVnhpm0s0RL(rFLd9R zQhT@LTUms=VK7jhOh+wpN`%vCS6xm?b_Bq5S~?4iI55JP0Sn?Na=ZRTJml&g_PXkw z9uF!#{D92sa`t##-M*fq1N31>kH_VtckQBwhk6E%&|cS(VUNR0_d7hEqusj?dtI*j z8OPzTvccJpQM*Ti+#VJ5$4@=whld9ohdeIYca$D<_`2P6^$=ZCJy@%!?+K2$eDo9E z9-oUIbb0%GhK68F;-1)h)Z?KKIlBA6gV)ysV?Iec1_wPo5EAFpHB^b-^mvi1-vmFv zysn|6o?|XF(!T}PH~L9`$qrb?{YRa{5XP43p)JI4x7T;bG2{aFsy0rF6QTd=SM9q$ z^`0Tx*)xPwaMp`yy2j^y66fOS8R(;*=<&Jf{vN&xw9Dazki0b1%jb5{-ADWT9Rp6R z=`z=%j)4*=$P)wQWc|!l29Xu>@Sm9*@Fd|WlaoX}JWcR8;dw?*LKDb0jDRftIR|_~ zMov{0{(nqo=DjDfvL7IcQ4+SUE#%DL6E6zKPRdWqr{w45VR>F-MdS9yx<*B#q4B}S ze`+jL)GFMH#}!X0f{N!Af2+t=u2+^SE0sHxyOm1i8``M$A)U@3HI^8&Oax63%@EdS ziY-cy@~X04WmW}M^QwPWeWbdnx}{oCiPfd*O0`kFU)`yGN_|p2r9Pv1MRR>jvt9f5 z+5@@+hTj^yOhcyU@ISU>+<(4^O8KsapEeXVKC8%AmMJyLL8V=NQ1ganL6g*O(><)a zru&_4Nte<|^_%n>y;a|;e_H<&{UO7G;XT8m;rE7HhTDd?;Y$Pl_ZR#Te%(pk8Qm=% zt4rvb^v(JX{YCv)SRc_-2C1RS;4_RE#=-E0;f}#zv>0v1Y2z!#OGcqdWGXhfO&-&6 z(}F2#S~AJZ3iBTG8S`251#`ljG!uh(8VEun>yUNHBCzhfywTsd)Of3LkD^I&L2*$LR?I3SN=hk( ztOu1o<%sgQ(y#nLxv0FMTvFarYE%Z5MYTuOq&laXRuO8UTBN4bPPJR@QTx>M>IHRF zy{N9!$TSL#Mq|)SYR+g@Gzm>oBhuvfreLEBv z0z}{eG8xYGPFaq#(K*XV>TGgh4qZqq5{{uK&N-w_7PoN|PUmi7Vym{&R%>Nz-+d1) z$(7UPn^z?F_H)@B;+-e4fJXmkk4{~jDZKN&3*}M zl2w_nfKT6|Vu_DIvLifin zS#HCQ)OJv}dSYxPbW-5KVERBKKAdj+e30}xhX2KGEc`*twlbKDuhf6Zcwn!1q(obkp`vKRurR`wts|I$1L_txOr>=$t) zyC~(0OJBQnOIm3f34(Tn3b14Aw)8Ct_#hQGY|TmkC)lT=_-7KnoxpEP_|*h{S;T3V z61WdXz}@KtJRym{n!q-!d1PC9X#y5V-nJz01GzN1xLqN*pGo+30>3TcR}=VU313R! zJ`ty#PT&(WjPmd3-458(xA}*ZuTMc`4ZjK%L!f~*J{))$k2?gR4>6%z_+M2A6cS^I zFrLNbJJ{`nG5jO1Dxo_?MEk?Idk3q20Qr9~{G+VlJK*d~9s@+{9$wnPQUM&>QMH|H z0ZC(=cx(_BiN;fy&)pm%2t#a&?h{egzu<{nwm-JS&u#!-;^*k+$YVZVd;~S(V-dCz z8s}#n!ay4sEv{{D#${jMGV-P_@hcD+3d8D?QiXhdE2sy!cL*R{5;z*O-+}iE=JF6G z`j}7;p8fo?BUHQ=KTG!Z^@+cMx7kK0@+z}#*<-^$^eX&Ld1I(d9FPvx!E=NMAN<4N zpf}}q@?}t_vTkLNCuna=m}ii;D4rvFT%_m7@cv(%ct?RSrCq|!`Fr~V{~V9|?ebA- zirAib_m22;`q#xNwxQ6Qpa4-NB>zKBg}(KF%4MN%{x9;u^?V2ij}{ck0tmlez*5=x zvx45#)pEqZu+L{>cj0w->*YnIKo1n{1A4ru3Fur=JC%iZsY{hvup0JA!?-ir*oN9r zJVybpo>D&w;%W6I>aXyD;@61xpBDddWdJdw{JO7i9rY!z-GoF4t|3^%z|YC_py-6g zovToALXLP&KA8MT$uB9zO#jXU)Lt5S<&Ywu89`0ShlTLh(NJ{cz6Ckd9l2lT2<5F3 zFA6ASDjeBW_&ANu%fdYD|K#(zZOLR21c8he{Y=zb;uUcIASiE8yGHBj&wcu{NzwBK zq3*pT>4MKPJh1zX^eJ#aN?qvXPqC@=AaxV>m6oJW6E64b~&P5 zh}i9u>~Md@S8&Ca)NOO*GSj|{V1<0WX)4r{5}?+u;(L2`^h50a1|{cI^w3qoa3?G_ z@@h7-7fyxf;6#?rDY|bPB<}MQtq*)cAmLBKl}mh-+-yJ})?f`X8};=?QSUZL3M5D& zCMaZ2=#zcID@N;-0m_ekd#pIPH|NMSNc}t`=2<=tIUaS!$mk|1*ijI}DCFKWZjUy! z)sp=fBb)ZEpOAcrR}Pqfn@pf5lLQ*_;%D|5bMwXXSUxTuv0$07YxoI%Z{JsOV1G*g zB-9L!my20>jl={y~LJ z+k6QY^xcm|myi$`(yttB4CUM<9C+iS;WtULd$NTKMDtf4v0G9WpElSue{sy{HxJG*(t002*04IsepPL-whuKK;d zNL@d|%a}*M^=HrM%YX3L>rLKjS{6$spAhdGLa>2H!uT6cHn$dZ&amBD%4O z93ofc&dydhR zM?{(@B{U(TtD7hwqUe*G!x6KlV=f$ow(B?p*PWK8LwE#w98BF*ijxU@&q+lxqee_E9z1y{b>5AbJJ43~#gk89Q?nvD zQiK>ak>P92t`}PvIF7Sh4q-R+qyUzorv_dQ(RVu7od%Z*;nQI$G5wySqyO!QG`h4;+4{+pxzgob6VF zsk)=J#jW;ssy&_F)^pyoEgkCCuCC6m?n<@2rK6+s6khGlg)d};P6@52+RvUlqdwi) z_4z1i_z73HFC0HJ%(V$ky8HdP6&9-154^EY9+tF~SMQQ~(E*eGzN zc-#NOaAx`ZU{dk|LVXmx)g{!7=h>I&ey{eAw1Zl#ozgz2%F=Du7Rmvig9xNdme@b?DVm}AT{)*BthW5zQ^uW88iifPf5Vt%XN{1#`l1#LgJ zt=o_Wp~K*0P&2NLYtL8xR@cv7W~bRzw#;ZXHkmG&zGEsibLLv}Nwe2{(L8B>!~9+I zC+6Rq-?8lHsyGu@%^l<(;~d;kj{F58fUg0|kY&R2^X z_Sp zkoF1VRpX>_$`~@P8#j!#rh}%#rb*K^ScJBSX~PsV)te8S9p)oumwC$kih0_6!yGV2 z%`r2w&=$GHVL4)HvP@fUSOS(=%bW#qG$-d6&c!ux&77MPxBxfH&2jTwkds>#R;9Js a>b45j4y(sHXPvhOt&7%W>x{N;g8DD65%kCa From 0e188a18a7f8f2c4cf1e2294a5f4c1641d7e5518 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 17 Oct 2019 09:21:20 -0700 Subject: [PATCH 010/293] update test to match malloc with free --- test/main-override.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/main-override.cpp b/test/main-override.cpp index 2cafd2cd..4bc91ae8 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -41,7 +41,7 @@ int main() { p2 = malloc(16); p1 = realloc(p1, 32); free(p1); - mi_free(p2); + free(p2); mi_free(s); Test* t = new Test(42); delete t; From a96c90db5dc29223936cf32e74b2aa78aab93837 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 17 Oct 2019 09:22:23 -0700 Subject: [PATCH 011/293] remove old windows overriding method --- src/alloc-override-win.c | 715 --------------------------------------- 1 file changed, 715 deletions(-) delete mode 100644 src/alloc-override-win.c diff --git a/src/alloc-override-win.c b/src/alloc-override-win.c deleted file mode 100644 index dc4796ab..00000000 --- a/src/alloc-override-win.c +++ /dev/null @@ -1,715 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2018, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -#include "mimalloc.h" -#include "mimalloc-internal.h" - -#if !defined(_WIN32) -#error "this file should only be included on Windows" -#endif - -#include -#include - -#include // getenv -#include // _setmaxstdio -#include // strstr - - -/* -To override the C runtime `malloc` on Windows we need to patch the allocation -functions at runtime initialization. Unfortunately we can never patch before the -runtime initializes itself, because as soon as we call `GetProcAddress` on the -runtime module (a DLL or EXE in Windows speak), it will first load and initialize -(by the OS calling `DllMain` on it). - -This means that some things might be already allocated by the C runtime itself -(and possibly other DLL's) before we get to resolve runtime adresses. This is -no problem if everyone unwinds in order: when we unload, we unpatch and restore -the original crt `free` routines and crt malloc'd memory is freed correctly. - -But things go wrong if such early CRT alloc'd memory is freed or re-allocated -_after_ we patch, but _before_ we unload (and unpatch), or if any memory allocated -by us is freed after we unpatched. - -There are two tricky situations to deal with: - -1. The Thread Local Storage (TLS): when the main thread stops it will call registered - callbacks on TLS entries (allocated by `FlsAlloc`). This is done by the OS - before any DLL's are unloaded. Unfortunately, the C runtime registers such - TLS entries with CRT allocated memory which is freed in the callback. - -2. Inside the CRT: - a. Some variables might get initialized by patched allocated - blocks but freed during CRT unloading after we unpatched - (like temporary file buffers). - b. Some blocks are allocated at CRT and freed by the CRT (like the - environment storage). - c. And some blocks are allocated by the CRT and then reallocated - while patched, and finally freed after unpatching! This - happens with the `atexit` functions for example to grow the array - of registered functions. - -In principle situation 2 is hopeless: since we cannot patch before CRT initialization, -we can never be sure how to free or reallocate a pointer during CRT unloading. -However, in practice there is a good solution: when terminating, we just patch -the reallocation and free routines to no-ops -- we are winding down anyway! This leaves -just the reallocation problm of CRT alloc'd memory once we are patched. Here, a study of the -CRT reveals that there seem to be just three such situations: - -1. When registering `atexit` routines (to grow the exit function table), -2. When calling `_setmaxstdio` (to grow the file handle table), -3. and `_popen`/`_wpopen` (to grow handle pairs). These turn out not to be - a problem as these are NULL initialized. - -We fix these by providing wrappers: - -1. We first register a _global_ `atexit` routine ourselves (`mi_patches_at_exit`) before patching, - and then patch the `_crt_atexit` function to implement our own global exit list (and the - same for `_crt_at_quick_exit`). All module local lists are no problem since they are always fully - (un)patched from initialization to end. We can register in the global list by dynamically - getting the global `_crt_atexit` entry from `ucrtbase.dll`. - -2. The `_setmaxstdio` is _detoured_: we patch it by a stub that unpatches first, - calls the original routine and repatches again. - -That leaves us to reliably shutdown and enter "termination mode": - -1. Using our trick to get the global exit list entry point, we register an exit function `mi_patches_atexit` - that first executes all our home brew list of exit functions, and then enters a _termination_ - phase that patches realloc/free variants with no-ops. Patching later again with special no-ops for - `free` also improves efficiency during the program run since no flags need to be checked. - -2. That is not quite good enough yet since after executing exit routines after us on the - global exit list (registered by the CRT), - the OS starts to unwind the TLS callbacks and we would like to run callbacks registered after loading - our DLL to be done in patched mode. So, we also allocate a TLS entry when our DLL is loaded and when its - callback is called, we re-enable the original patches again. Since TLS is destroyed in FIFO order - this runs any callbacks in later DLL's in patched mode. - -3. Finally the DLL's get unloaded by the OS in order (still patched) until our DLL gets unloaded - and then we start a termination phase again, and patch realloc/free with no-ops for good this time. - -*/ - -static int __cdecl mi_setmaxstdio(int newmax); - -// ------------------------------------------------------ -// Microsoft allocation extensions -// ------------------------------------------------------ - - -typedef size_t mi_nothrow_t; - -static void mi_free_nothrow(void* p, mi_nothrow_t tag) { - UNUSED(tag); - mi_free(p); -} - -// Versions of `free`, `realloc`, `recalloc`, `expand` and `msize` -// that are used during termination and are no-ops. -static void mi_free_term(void* p) { - UNUSED(p); -} - -static void mi_free_size_term(void* p, size_t size) { - UNUSED(size); - UNUSED(p); -} - -static void mi_free_nothrow_term(void* p, mi_nothrow_t tag) { - UNUSED(tag); - UNUSED(p); -} - -static void* mi_realloc_term(void* p, size_t newsize) { - UNUSED(p); UNUSED(newsize); - return NULL; -} - -static void* mi__recalloc_term(void* p, size_t newcount, size_t newsize) { - UNUSED(p); UNUSED(newcount); UNUSED(newsize); - return NULL; -} - -static void* mi__expand_term(void* p, size_t newsize) { - UNUSED(p); UNUSED(newsize); - return NULL; -} - -static size_t mi__msize_term(void* p) { - UNUSED(p); - return 0; -} - - -static void* mi__malloc_dbg(size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return _malloc_base(size); -} - -static void* mi__calloc_dbg(size_t count, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return _calloc_base(count, size); -} - -static void* mi__realloc_dbg(void* p, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return _realloc_base(p, size); -} - -static void mi__free_dbg(void* p, int block_type) { - UNUSED(block_type); - _free_base(p); -} - - -// the `recalloc`,`expand`, and `msize` don't have base versions and thus need a separate term version - -static void* mi__recalloc_dbg(void* p, size_t count, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return mi_recalloc(p, count, size); -} - -static void* mi__expand_dbg(void* p, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return mi__expand(p, size); -} - -static size_t mi__msize_dbg(void* p, int block_type) { - UNUSED(block_type); - return mi_usable_size(p); -} - -static void* mi__recalloc_dbg_term(void* p, size_t count, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return mi__recalloc_term(p, count, size); -} - -static void* mi__expand_dbg_term(void* p, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return mi__expand_term(p, size); -} - -static size_t mi__msize_dbg_term(void* p, int block_type) { - UNUSED(block_type); - return mi__msize_term(p); -} - - -// ------------------------------------------------------ -// implement our own global atexit handler -// ------------------------------------------------------ -typedef void (cbfun_t)(void); -typedef int (atexit_fun_t)(cbfun_t* fn); -typedef uintptr_t encoded_t; - -typedef struct exit_list_s { - encoded_t functions; // encoded pointer to array of encoded function pointers - size_t count; - size_t capacity; -} exit_list_t; - -#define MI_EXIT_INC (64) - -static exit_list_t atexit_list = { 0, 0, 0 }; -static exit_list_t at_quick_exit_list = { 0, 0, 0 }; -static CRITICAL_SECTION atexit_lock; - -// encode/decode function pointers with a random canary for security -static encoded_t canary; - -static inline void *decode(encoded_t x) { - return (void*)(x^canary); -} - -static inline encoded_t encode(void* p) { - return ((uintptr_t)p ^ canary); -} - - -static void init_canary() -{ - canary = _mi_random_init(0); - atexit_list.functions = at_quick_exit_list.functions = encode(NULL); -} - - -// initialize the list -static void mi_initialize_atexit(void) { - InitializeCriticalSection(&atexit_lock); - init_canary(); -} - -// register an exit function -static int mi_register_atexit(exit_list_t* list, cbfun_t* fn) { - if (fn == NULL) return EINVAL; - EnterCriticalSection(&atexit_lock); - encoded_t* functions = (encoded_t*)decode(list->functions); - if (list->count >= list->capacity) { // at first `functions == decode(0) == NULL` - encoded_t* newf = (encoded_t*)mi_recalloc(functions, list->capacity + MI_EXIT_INC, sizeof(cbfun_t*)); - if (newf != NULL) { - list->capacity += MI_EXIT_INC; - list->functions = encode(newf); - functions = newf; - } - } - int result; - if (list->count < list->capacity && functions != NULL) { - functions[list->count] = encode(fn); - list->count++; - result = 0; // success - } - else { - result = ENOMEM; - } - LeaveCriticalSection(&atexit_lock); - return result; -} - -// Register a global `atexit` function -static int mi_atexit(cbfun_t* fn) { - return mi_register_atexit(&atexit_list,fn); -} - -static int mi_at_quick_exit(cbfun_t* fn) { - return mi_register_atexit(&at_quick_exit_list,fn); -} - -static int mi_register_onexit(void* table, cbfun_t* fn) { - // TODO: how can we distinguish a quick_exit from atexit? - return mi_atexit(fn); -} - -// Execute exit functions in a list -static void mi_execute_exit_list(exit_list_t* list) { - // copy and zero the list structure - EnterCriticalSection(&atexit_lock); - exit_list_t clist = *list; - memset(list,0,sizeof(*list)); - LeaveCriticalSection(&atexit_lock); - - // now execute the functions outside of the lock - encoded_t* functions = (encoded_t*)decode(clist.functions); - if (functions != NULL) { - for (size_t i = clist.count; i > 0; i--) { // careful with unsigned count down.. - cbfun_t* fn = (cbfun_t*)decode(functions[i-1]); - if (fn==NULL) break; // corrupted! - fn(); - } - mi_free(functions); - } -} - - - -// ------------------------------------------------------ -// Jump assembly instructions for patches -// ------------------------------------------------------ - -#if defined(_M_IX86) || defined(_M_X64) - -#define MI_JUMP_SIZE 14 // at most 2+4+8 for a long jump or 1+5 for a short one - -typedef struct mi_jump_s { - uint8_t opcodes[MI_JUMP_SIZE]; -} mi_jump_t; - -void mi_jump_restore(void* current, const mi_jump_t* saved) { - memcpy(current, &saved->opcodes, MI_JUMP_SIZE); -} - -void mi_jump_write(void* current, void* target, mi_jump_t* save) { - if (save != NULL) { - memcpy(&save->opcodes, current, MI_JUMP_SIZE); - } - uint8_t* opcodes = ((mi_jump_t*)current)->opcodes; - ptrdiff_t diff = (uint8_t*)target - (uint8_t*)current; - uint32_t ofs32 = (uint32_t)diff; - #ifdef _M_X64 - uint64_t ofs64 = (uint64_t)diff; - if (ofs64 != (uint64_t)ofs32) { - // use long jump - opcodes[0] = 0xFF; - opcodes[1] = 0x25; - *((uint32_t*)&opcodes[2]) = 0; - *((uint64_t*)&opcodes[6]) = (uint64_t)target; - } - else - #endif - { - // use short jump - opcodes[0] = 0xE9; - *((uint32_t*)&opcodes[1]) = ofs32 - 5 /* size of the short jump instruction */; - } -} - -#elif defined(_M_ARM64) - -#define MI_JUMP_SIZE 16 - -typedef struct mi_jump_s { - uint8_t opcodes[MI_JUMP_SIZE]; -} mi_jump_t; - -void mi_jump_restore(void* current, const mi_jump_t* saved) { - memcpy(current, &saved->opcodes, MI_JUMP_SIZE); -} - -void mi_jump_write(void* current, void* target, mi_jump_t* save) { - if (save != NULL) { - memcpy(&save->opcodes, current, MI_JUMP_SIZE); - } - uint8_t* opcodes = ((mi_jump_t*)current)->opcodes; - uint64_t diff = (uint8_t*)target - (uint8_t*)current; - - // 0x50 0x00 0x00 0x58 ldr x16, .+8 # load PC relative +8 - // 0x00 0x02 0x3F 0xD6 blr x16 # and jump - //
- //
- static const uint8_t jump_opcodes[8] = { 0x50, 0x00, 0x00, 0x58, 0x00, 0x02, 0x3F, 0xD6 }; - memcpy(&opcodes[0], jump_opcodes, sizeof(jump_opcodes)); - *((uint64_t*)&opcodes[8]) = diff; -} - -#else -#error "define jump instructions for this platform" -#endif - - -// ------------------------------------------------------ -// Patches -// ------------------------------------------------------ -typedef enum patch_apply_e { - PATCH_NONE, - PATCH_TARGET, - PATCH_TARGET_TERM -} patch_apply_t; - -#define MAX_ENTRIES 4 // maximum number of patched entry points (like `malloc` in ucrtbase and msvcrt) - -typedef struct mi_patch_s { - const char* name; // name of the function to patch - void* target; // the address of the new target (never NULL) - void* target_term; // the address of the target during termination (or NULL) - patch_apply_t applied; // what target has been applied? - void* originals[MAX_ENTRIES]; // the resolved addresses of the function (or NULLs) - mi_jump_t saves[MAX_ENTRIES]; // the saved instructions in case it was applied -} mi_patch_t; - -#define MI_PATCH_NAME3(name,target,term) { name, &target, &term, PATCH_NONE, {NULL,NULL,NULL,NULL} } -#define MI_PATCH_NAME2(name,target) { name, &target, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} } -#define MI_PATCH3(name,target,term) MI_PATCH_NAME3(#name, target, term) -#define MI_PATCH2(name,target) MI_PATCH_NAME2(#name, target) -#define MI_PATCH1(name) MI_PATCH2(name,mi_##name) - -static mi_patch_t patches[] = { - // we implement our own global exit handler (as the CRT versions do a realloc internally) - //MI_PATCH2(_crt_atexit, mi_atexit), - //MI_PATCH2(_crt_at_quick_exit, mi_at_quick_exit), - MI_PATCH2(_setmaxstdio, mi_setmaxstdio), - MI_PATCH2(_register_onexit_function, mi_register_onexit), - - // override higher level atexit functions so we can implement at_quick_exit correcty - MI_PATCH2(atexit, mi_atexit), - MI_PATCH2(at_quick_exit, mi_at_quick_exit), - - // regular entries - MI_PATCH2(malloc, mi_malloc), - MI_PATCH2(calloc, mi_calloc), - MI_PATCH3(realloc, mi_realloc,mi_realloc_term), - MI_PATCH3(free, mi_free,mi_free_term), - - // extended api - MI_PATCH2(_strdup, mi_strdup), - MI_PATCH2(_strndup, mi_strndup), - MI_PATCH3(_expand, mi__expand,mi__expand_term), - MI_PATCH3(_recalloc, mi_recalloc,mi__recalloc_term), - MI_PATCH3(_msize, mi_usable_size,mi__msize_term), - - // base versions - MI_PATCH2(_malloc_base, mi_malloc), - MI_PATCH2(_calloc_base, mi_calloc), - MI_PATCH3(_realloc_base, mi_realloc,mi_realloc_term), - MI_PATCH3(_free_base, mi_free,mi_free_term), - - // these base versions are in the crt but without import records - MI_PATCH_NAME3("_recalloc_base", mi_recalloc,mi__recalloc_term), - MI_PATCH_NAME3("_msize_base", mi_usable_size,mi__msize_term), - - // debug - MI_PATCH2(_malloc_dbg, mi__malloc_dbg), - MI_PATCH2(_realloc_dbg, mi__realloc_dbg), - MI_PATCH2(_calloc_dbg, mi__calloc_dbg), - MI_PATCH2(_free_dbg, mi__free_dbg), - - MI_PATCH3(_expand_dbg, mi__expand_dbg, mi__expand_dbg_term), - MI_PATCH3(_recalloc_dbg, mi__recalloc_dbg, mi__recalloc_dbg_term), - MI_PATCH3(_msize_dbg, mi__msize_dbg, mi__msize_dbg_term), - -#if 0 - // override new/delete variants for efficiency (?) -#ifdef _WIN64 - // 64 bit new/delete - MI_PATCH_NAME2("??2@YAPEAX_K@Z", mi_new), - MI_PATCH_NAME2("??_U@YAPEAX_K@Z", mi_new), - MI_PATCH_NAME3("??3@YAXPEAX@Z", mi_free, mi_free_term), - MI_PATCH_NAME3("??_V@YAXPEAX@Z", mi_free, mi_free_term), - MI_PATCH_NAME3("??3@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized - MI_PATCH_NAME3("??_V@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized - MI_PATCH_NAME2("??2@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new), - MI_PATCH_NAME2("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new), - MI_PATCH_NAME3("??3@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term), - MI_PATCH_NAME3("??_V@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term), - - -#else - // 32 bit new/delete - MI_PATCH_NAME2("??2@YAPAXI@Z", mi_new), - MI_PATCH_NAME2("??_U@YAPAXI@Z", mi_new), - MI_PATCH_NAME3("??3@YAXPAX@Z", mi_free, mi_free_term), - MI_PATCH_NAME3("??_V@YAXPAX@Z", mi_free, mi_free_term), - MI_PATCH_NAME3("??3@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized - MI_PATCH_NAME3("??_V@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized - - MI_PATCH_NAME2("??2@YAPAXIABUnothrow_t@std@@@Z", mi_new), - MI_PATCH_NAME2("??_U@YAPAXIABUnothrow_t@std@@@Z", mi_new), - MI_PATCH_NAME3("??3@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term), - MI_PATCH_NAME3("??_V@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term), - -#endif -#endif - { NULL, NULL, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} } -}; - - -// Apply a patch -static bool mi_patch_apply(mi_patch_t* patch, patch_apply_t apply) -{ - if (patch->originals[0] == NULL) return true; // unresolved - if (apply == PATCH_TARGET_TERM && patch->target_term == NULL) apply = PATCH_TARGET; // avoid re-applying non-term variants - if (patch->applied == apply) return false; - - for (int i = 0; i < MAX_ENTRIES; i++) { - void* original = patch->originals[i]; - if (original == NULL) break; // no more - - DWORD protect = PAGE_READWRITE; - if (!VirtualProtect(original, MI_JUMP_SIZE, PAGE_EXECUTE_READWRITE, &protect)) return false; - if (apply == PATCH_NONE) { - mi_jump_restore(original, &patch->saves[i]); - } - else { - void* target = (apply == PATCH_TARGET ? patch->target : patch->target_term); - mi_assert_internal(target != NULL); - if (target != NULL) mi_jump_write(original, target, &patch->saves[i]); - } - VirtualProtect(original, MI_JUMP_SIZE, protect, &protect); - } - patch->applied = apply; - return true; -} - -// Apply all patches -static bool _mi_patches_apply(patch_apply_t apply, patch_apply_t* previous) { - static patch_apply_t current = PATCH_NONE; - if (previous != NULL) *previous = current; - if (current == apply) return true; - current = apply; - bool ok = true; - for (size_t i = 0; patches[i].name != NULL; i++) { - if (!mi_patch_apply(&patches[i], apply)) ok = false; - } - return ok; -} - -// Export the following three functions just in case -// a user needs that level of control. - -// Disable all patches -mi_decl_export void mi_patches_disable(void) { - _mi_patches_apply(PATCH_NONE, NULL); -} - -// Enable all patches normally -mi_decl_export bool mi_patches_enable(void) { - return _mi_patches_apply( PATCH_TARGET, NULL ); -} - -// Enable all patches in termination phase where free is a no-op -mi_decl_export bool mi_patches_enable_term(void) { - return _mi_patches_apply(PATCH_TARGET_TERM, NULL); -} - -// ------------------------------------------------------ -// Stub for _setmaxstdio -// ------------------------------------------------------ - -static int __cdecl mi_setmaxstdio(int newmax) { - patch_apply_t previous; - _mi_patches_apply(PATCH_NONE, &previous); // disable patches - int result = _setmaxstdio(newmax); // call original function (that calls original CRT recalloc) - _mi_patches_apply(previous,NULL); // and re-enable patches - return result; -} - - -// ------------------------------------------------------ -// Resolve addresses dynamically -// ------------------------------------------------------ - -// Try to resolve patches for a given module (DLL) -static void mi_module_resolve(const char* fname, HMODULE mod, int priority) { - // see if any patches apply - for (size_t i = 0; patches[i].name != NULL; i++) { - mi_patch_t* patch = &patches[i]; - if (patch->applied == PATCH_NONE) { - // find an available entry - int i = 0; - while (i < MAX_ENTRIES && patch->originals[i] != NULL) i++; - if (i < MAX_ENTRIES) { - void* addr = GetProcAddress(mod, patch->name); - if (addr != NULL) { - // found it! set the address - patch->originals[i] = addr; - _mi_trace_message(" found %s at %s!%p (entry %i)\n", patch->name, fname, addr, i); - } - } - } - } -} - -#define MIMALLOC_NAME "mimalloc-override.dll" -#define UCRTBASE_NAME "ucrtbase.dll" -#define UCRTBASED_NAME "ucrtbased.dll" - -// Resolve addresses of all patches by inspecting the loaded modules -static atexit_fun_t* crt_atexit = NULL; -static atexit_fun_t* crt_at_quick_exit = NULL; - - -static bool mi_patches_resolve(void) { - // get all loaded modules - HANDLE process = GetCurrentProcess(); // always -1, no need to release - DWORD needed = 0; - HMODULE modules[400]; // try to stay under 4k to not trigger the guard page - EnumProcessModules(process, modules, sizeof(modules), &needed); - if (needed == 0) return false; - int count = needed / sizeof(HMODULE); - int ucrtbase_index = 0; - int mimalloc_index = 0; - // iterate through the loaded modules - for (int i = 0; i < count; i++) { - HMODULE mod = modules[i]; - char filename[MAX_PATH] = { 0 }; - DWORD slen = GetModuleFileName(mod, filename, MAX_PATH); - if (slen > 0 && slen < MAX_PATH) { - // filter out potential crt modules only - filename[slen] = 0; - const char* lastsep = strrchr(filename, '\\'); - const char* basename = (lastsep==NULL ? filename : lastsep+1); - _mi_trace_message(" %i: dynamic module %s\n", i, filename); - - // remember indices so we can check load order (in debug mode) - if (_stricmp(basename, MIMALLOC_NAME) == 0) mimalloc_index = i; - if (_stricmp(basename, UCRTBASE_NAME) == 0) ucrtbase_index = i; - if (_stricmp(basename, UCRTBASED_NAME) == 0) ucrtbase_index = i; - - // see if we potentially patch in this module - int priority = 0; - if (i == 0) priority = 2; // main module to allow static crt linking - else if (_strnicmp(basename, "ucrt", 4) == 0) priority = 3; // new ucrtbase.dll in windows 10 - // NOTE: don't override msvcr -- leads to crashes in setlocale (needs more testing) - // else if (_strnicmp(basename, "msvcr", 5) == 0) priority = 1; // older runtimes - - if (priority > 0) { - // probably found a crt module, try to patch it - mi_module_resolve(basename,mod,priority); - - // try to find the atexit functions for the main process (in `ucrtbase.dll`) - if (crt_atexit==NULL) crt_atexit = (atexit_fun_t*)GetProcAddress(mod, "_crt_atexit"); - if (crt_at_quick_exit == NULL) crt_at_quick_exit = (atexit_fun_t*)GetProcAddress(mod, "_crt_at_quick_exit"); - } - } - } - int diff = mimalloc_index - ucrtbase_index; - if (diff > 1) { - _mi_warning_message("warning: the \"mimalloc-override\" DLL seems not to load before or right after the C runtime (\"ucrtbase\").\n" - " Try to fix this by changing the linking order.\n"); - } - return true; -} - - -// ------------------------------------------------------ -// Dll Entry -// ------------------------------------------------------ - -extern BOOL WINAPI _DllMainCRTStartup(HINSTANCE inst, DWORD reason, LPVOID reserved); - -static DWORD mi_fls_unwind_entry; -static void NTAPI mi_fls_unwind(PVOID value) { - if (value != NULL) mi_patches_enable(); // and re-enable normal patches again for DLL's loaded after us - return; -} - -static void mi_patches_atexit(void) { - mi_execute_exit_list(&atexit_list); - mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op -} - -static void mi_patches_at_quick_exit(void) { - mi_execute_exit_list(&at_quick_exit_list); - mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op -} - -BOOL WINAPI DllEntry(HINSTANCE inst, DWORD reason, LPVOID reserved) { - if (reason == DLL_PROCESS_ATTACH) { - __security_init_cookie(); - } - else if (reason == DLL_PROCESS_DETACH) { - // enter termination phase for good now - mi_patches_enable_term(); - } - // C runtime main - BOOL ok = _DllMainCRTStartup(inst, reason, reserved); - if (reason == DLL_PROCESS_ATTACH && ok) { - // initialize at exit lists - mi_initialize_atexit(); - - // Now resolve patches - ok = mi_patches_resolve(); - if (ok) { - // check if patching is not disabled - #pragma warning(suppress:4996) - const char* s = getenv("MIMALLOC_DISABLE_OVERRIDE"); - bool enabled = (s == NULL || !(strstr("1;TRUE;YES;ON", s) != NULL)); - if (!enabled) { - _mi_verbose_message("override is disabled\n"); - } - else { - // and register our unwind entry (this must be after resolving due to possible delayed DLL initialization from GetProcAddress) - mi_fls_unwind_entry = FlsAlloc(&mi_fls_unwind); - if (mi_fls_unwind_entry != FLS_OUT_OF_INDEXES) { - FlsSetValue(mi_fls_unwind_entry, (void*)1); - } - - // register our patch disabler in the global exit list - if (crt_atexit != NULL) (*crt_atexit)(&mi_patches_atexit); - if (crt_at_quick_exit != NULL) (*crt_at_quick_exit)(&mi_patches_at_quick_exit); - - // and patch ! this also redirects the `atexit` handling for the global exit list - mi_patches_enable(); - _mi_verbose_message("override is enabled\n"); - - // hide internal allocation - mi_stats_reset(); - } - } - } - return ok; -} From 08d83cc33dcb1338428821f94e50bd436ef5f656 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 17 Oct 2019 09:24:57 -0700 Subject: [PATCH 012/293] disallow regular allocation from the huge reserved area --- src/os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/os.c b/src/os.c index 2ad0648f..e7313c80 100644 --- a/src/os.c +++ b/src/os.c @@ -456,6 +456,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo if (!commit) allow_large = false; void* p = NULL; + /* if (commit && allow_large) { p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); if (p != NULL) { @@ -463,6 +464,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo return p; } } + */ #if defined(_WIN32) int flags = MEM_RESERVE; From 4609537b8ae05f135d51c5d608398df303ae7dc6 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 17 Oct 2019 09:47:12 -0700 Subject: [PATCH 013/293] pick better umul_overflow variant based on intptr size --- include/mimalloc-internal.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index e99e6df6..c4f85ca4 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -156,10 +156,13 @@ bool _mi_page_is_valid(mi_page_t* page); #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 -#if (MI_INTPTR_SIZE == 4) +#include // INT_MAX, LONG_MAX +#if (INTPTR_MAX == INT_MAX) return __builtin_umul_overflow(count, size, total); -#else +#elif (INTPTR_MAX == LONG_MAX) return __builtin_umull_overflow(count, size, total); +#else + return __builtin_umulll_overflow(count, size, total); #endif #else /* __builtin_umul_overflow is unavailable */ *total = count * size; From f3a162f09527922aa9c6fdf333cd3a87aafd5682 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 17 Oct 2019 09:52:10 -0700 Subject: [PATCH 014/293] pick better umul_overflow variant based on size_t size --- include/mimalloc-internal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index c4f85ca4..1a5b639d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -156,10 +156,10 @@ bool _mi_page_is_valid(mi_page_t* page); #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 -#include // INT_MAX, LONG_MAX -#if (INTPTR_MAX == INT_MAX) +#include // UINT_MAX, ULONG_MAX +#if (SIZE_MAX == UINT_MAX) return __builtin_umul_overflow(count, size, total); -#elif (INTPTR_MAX == LONG_MAX) +#elif (SIZE_MAX == ULONG_MAX) return __builtin_umull_overflow(count, size, total); #else return __builtin_umulll_overflow(count, size, total); From e747a6f3a6283699ee7b51a5dc1ce9b396a1106c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 17 Oct 2019 17:01:56 +0200 Subject: [PATCH 015/293] Use `unsigned` for bit-field variables It is actually non-standard to use `bool` with a bit-field quantifier, and VS 2019 complains about this. Signed-off-by: Johannes Schindelin --- include/mimalloc-types.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 72fb7e7e..4d0ade1b 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -136,8 +136,8 @@ typedef union mi_page_flags_u { uint16_t value; uint8_t full_aligned; struct { - bool in_full:1; - bool has_aligned:1; + unsigned in_full:1; + unsigned has_aligned:1; bool is_zero; // `true` if the blocks in the free list are zero initialized }; } mi_page_flags_t; @@ -167,10 +167,10 @@ typedef uintptr_t mi_thread_free_t; typedef struct mi_page_s { // "owned" by the segment uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` - bool segment_in_use:1; // `true` if the segment allocated this page - bool is_reset:1; // `true` if the page memory was reset - bool is_committed:1; // `true` if the page virtual memory is committed - bool is_zero_init:1; // `true` if the page was zero initialized + unsigned segment_in_use:1; // `true` if the segment allocated this page + unsigned is_reset:1; // `true` if the page memory was reset + unsigned is_committed:1; // `true` if the page virtual memory is committed + unsigned is_zero_init:1; // `true` if the page was zero initialized // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` From 5bd8ea2e4feaa3e6ce8aa96b9c32994182aa812d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 17 Oct 2019 17:02:51 +0200 Subject: [PATCH 016/293] Repeat `mi_decl_allocator` in functions' definitions Quite a few functions are declared with that attribute, and VS 2019 complains if the definition does not repeat it. Signed-off-by: Johannes Schindelin --- src/alloc-aligned.c | 48 ++++++++++++++++++++++----------------------- src/alloc.c | 44 ++++++++++++++++++++--------------------- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 352f07b2..99347933 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -61,53 +61,53 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t } -void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false); } -void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_malloc_aligned_at(heap, size, alignment, 0); } -void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true); } -void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_zalloc_aligned_at(heap, size, alignment, 0); } -void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; if (mi_mul_overflow(count, size, &total)) return NULL; return mi_heap_zalloc_aligned_at(heap, total, alignment, offset); } -void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_calloc_aligned_at(heap,count,size,alignment,0); } -void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset); } -void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment); } -void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset); } -void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment); } -void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset); } -void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment); } @@ -150,55 +150,55 @@ static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsi return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero); } -void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false); } -void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false); } -void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true); } -void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true); } -void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; if (mi_mul_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset); } -void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { size_t total; if (mi_mul_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned(heap, p, total, alignment); } -void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); } -void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment); } -void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); } -void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment); } -void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_allocator void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset); } -void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment); } diff --git a/src/alloc.c b/src/alloc.c index 9d50bf9f..3950496a 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -47,26 +47,26 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } // allocate a small block -extern inline void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { +extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { mi_assert(size <= MI_SMALL_SIZE_MAX); mi_page_t* page = _mi_heap_get_free_small_page(heap,size); return _mi_page_malloc(heap, page, size); } -extern inline void* mi_malloc_small(size_t size) mi_attr_noexcept { +extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept { return mi_heap_malloc_small(mi_get_default_heap(), size); } // zero initialized small block -void* mi_zalloc_small(size_t size) mi_attr_noexcept { +mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept { void* p = mi_malloc_small(size); if (p != NULL) { memset(p, 0, size); } return p; } // The main allocation function -extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { +extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { mi_assert(heap!=NULL); mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local void* p; @@ -85,7 +85,7 @@ extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcep return p; } -extern inline void* mi_malloc(size_t size) mi_attr_noexcept { +extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { return mi_heap_malloc(mi_get_default_heap(), size); } @@ -115,11 +115,11 @@ void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) { return p; } -extern inline void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { +extern inline mi_decl_allocator void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { return _mi_heap_malloc_zero(heap, size, true); } -void* mi_zalloc(size_t size) mi_attr_noexcept { +mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept { return mi_heap_zalloc(mi_get_default_heap(),size); } @@ -360,29 +360,29 @@ void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { mi_free(p); } -extern inline void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { +extern inline mi_decl_allocator void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_mul_overflow(count,size,&total)) return NULL; return mi_heap_zalloc(heap,total); } -void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { +mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { return mi_heap_calloc(mi_get_default_heap(),count,size); } // Uninitialized `calloc` -extern void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { +extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_mul_overflow(count,size,&total)) return NULL; return mi_heap_malloc(heap, total); } -void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { +mi_decl_allocator void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { return mi_heap_mallocn(mi_get_default_heap(),count,size); } // Expand in place or fail -void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { +mi_decl_allocator void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { if (p == NULL) return NULL; size_t size = mi_usable_size(p); if (newsize > size) return NULL; @@ -408,11 +408,11 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) return newp; } -void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { return _mi_heap_realloc_zero(heap, p, newsize, false); } -void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_mul_overflow(count, size, &total)) return NULL; return mi_heap_realloc(heap, p, total); @@ -420,41 +420,41 @@ void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_a // Reallocate but free `p` on errors -void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { void* newp = mi_heap_realloc(heap, p, newsize); if (newp==NULL && p!=NULL) mi_free(p); return newp; } -void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { return _mi_heap_realloc_zero(heap, p, newsize, true); } -void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { +mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_mul_overflow(count, size, &total)) return NULL; return mi_heap_rezalloc(heap, p, total); } -void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { +mi_decl_allocator void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_realloc(mi_get_default_heap(),p,newsize); } -void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { +mi_decl_allocator void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { return mi_heap_reallocn(mi_get_default_heap(),p,count,size); } // Reallocate but free `p` on errors -void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { +mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_reallocf(mi_get_default_heap(),p,newsize); } -void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { +mi_decl_allocator void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_rezalloc(mi_get_default_heap(), p, newsize); } -void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { +mi_decl_allocator void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { return mi_heap_recalloc(mi_get_default_heap(), p, count, size); } From 0fd0122c0a478d75d08b434ee1e66f51331d3d69 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 16 Oct 2019 22:43:57 +0200 Subject: [PATCH 017/293] Avoid compiler warning when casting the result of `GetProcAddress()` It is most unfortunate that the return type of `GetProcAddress()` is `FARPROC` (which is essentially `intptr_t(*)(void)): this type cannot be cast by GCC without warnings to anything but the generic function pointer type `void(*)(void)`. Let's work around that. Signed-off-by: Johannes Schindelin --- src/os.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/os.c b/src/os.c index e7313c80..cc69123a 100644 --- a/src/os.c +++ b/src/os.c @@ -145,13 +145,13 @@ void _mi_os_init(void) { hDll = LoadLibrary(TEXT("kernelbase.dll")); if (hDll != NULL) { // use VirtualAlloc2FromApp if possible as it is available to Windows store apps - pVirtualAlloc2 = (PVirtualAlloc2)GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)GetProcAddress(hDll, "VirtualAlloc2"); + pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); + if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); FreeLibrary(hDll); } hDll = LoadLibrary(TEXT("ntdll.dll")); if (hDll != NULL) { - pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); + pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); FreeLibrary(hDll); } if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { From 559688ec6468c26b3831004301d727f0dce2437b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 16 Oct 2019 23:40:25 +0200 Subject: [PATCH 018/293] Suppress warning about unnamed struct This prevents MSVC complaining with warning C4201: nonstandard extension used: nameless struct/union The struct might seem unnecessary to the occasional reader (it did seem so to this commit's author), but it is not! It is required to align the fields to a boundary, which is verified by the test suite. Removing that "unnecessary" `struct` results in this failure: 1: Test command: mimalloc-test-api [...] 1: test: malloc-zero... mimalloc: assertion failed: at src/page.c:591, mi_page_init 1: assertion: "!mi_page_has_aligned(page)" Signed-off-by: Johannes Schindelin --- include/mimalloc-types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 4d0ade1b..c538d165 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -135,10 +135,11 @@ typedef enum mi_delayed_e { typedef union mi_page_flags_u { uint16_t value; uint8_t full_aligned; - struct { + struct { // force alignment unsigned in_full:1; unsigned has_aligned:1; bool is_zero; // `true` if the blocks in the free list are zero initialized +#pragma warning(suppress:4201) }; } mi_page_flags_t; From 26c27fbf587627caabc1e72c2b7d5a813a097464 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 17 Oct 2019 12:07:26 -0700 Subject: [PATCH 019/293] use uint8_t bit fields, and improve portability of page_flags type --- include/mimalloc-types.h | 25 +++++++++++++------------ src/init.c | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index c538d165..c2df6340 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -132,15 +132,16 @@ typedef enum mi_delayed_e { // The `in_full` and `has_aligned` page flags are put in a union to efficiently // test if both are false (`value == 0`) in the `mi_free` routine. -typedef union mi_page_flags_u { - uint16_t value; - uint8_t full_aligned; - struct { // force alignment - unsigned in_full:1; - unsigned has_aligned:1; - bool is_zero; // `true` if the blocks in the free list are zero initialized -#pragma warning(suppress:4201) +typedef struct mi_page_flags_s { + #pragma warning(suppress:4201) + union { + uint8_t full_aligned; + struct { + uint8_t in_full : 1; + uint8_t has_aligned : 1; + }; }; + bool is_zero; // `true` if the blocks in the free list are zero initialized } mi_page_flags_t; // Thread free list. @@ -168,10 +169,10 @@ typedef uintptr_t mi_thread_free_t; typedef struct mi_page_s { // "owned" by the segment uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` - unsigned segment_in_use:1; // `true` if the segment allocated this page - unsigned is_reset:1; // `true` if the page memory was reset - unsigned is_committed:1; // `true` if the page virtual memory is committed - unsigned is_zero_init:1; // `true` if the page was zero initialized + uint8_t segment_in_use:1; // `true` if the segment allocated this page + uint8_t is_reset:1; // `true` if the page memory was reset + uint8_t is_committed:1; // `true` if the page virtual memory is committed + uint8_t is_zero_init:1; // `true` if the page was zero initialized // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` diff --git a/src/init.c b/src/init.c index 5ab39c28..d62a2d34 100644 --- a/src/init.c +++ b/src/init.c @@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { 0, false, false, false, false, 0, 0, - { 0 }, + { { 0 }, false }, NULL, // free #if MI_SECURE 0, From 6e94950de329f0817ad8853b658aa15dc09984ab Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 17 Oct 2019 12:13:45 -0700 Subject: [PATCH 020/293] update redirection modules --- bin/mimalloc-redirect.dll | Bin 46592 -> 46592 bytes bin/mimalloc-redirect32.dll | Bin 33792 -> 33792 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll index ec718f91c10dfe4c47b83739df2764fd25ad8581..a1daf316b56cb3c885dc48f1c5fd1ef0ffa302b9 100644 GIT binary patch delta 690 zcmZp8!_@GGX~GRA=Dizlu`x0p-TaWznTc`7W>uCCN{pG4#ncxt-kQ8!{UMN?siDuf zVe&~07sk%Xe46!4A4MkjYp!Orm~5@3!zeg8L#vIkW%GM2Zx+UY&DQ#FSs1r$)-*|G z75&M;z|ifY641$-#m&gjT%!`eP?9uxp`{L^!Q^w6T5P>Q`Pb_v^IF9+PMn-=)iU{m zRivqQ9s|P*os<9ndjPe1cy#-yD0p=9oOrSI1W>5cMMdNFG7q3p8VscZ9=*JxAl1Al zC)ZiavIYM9|Nq6Y$qTJ>8O0~_+sM{8WHT^;3p6E8k}gIJ*Ox&|m(0@r&Ds0^yN7_N67L~j&S?>4wz2cX^vh_ZU1#j_Baqv4v9 zAex<^nycWNS&R?7wg&l5fuU3nXpjL^i^Gc;pcejZ2Y~Jn^8p2^M|X${&?;Uzpo$mb zAoGBt3LeczEDoQ6fyr_FPLnsVuuLwnEoO9@{KQs;ZAunUx$I;#0m;n=?Sut^M)FL);%_jCnO}~1?}}JpBr-4oaXJvY zBm!|DNQ8lbdGBVW05wJdlQhxv#v6kE3*XEPeDr{6(IU!noZVU_osSFGcfb>dt i1_l8T8%PU!GB88{*$F_}-iv`DBXx6Qs1Vbl6?_2Ufdkk8 delta 702 zcmZp8!_@GGX~GSr`BOIDVq;{yviTvSGZW*9&8jRPlo%@~i>WVQd^LHy`a>W&Q$wHe zz~qw}E{roL^J&&IF^W#^*Ido$G1*#6hf#5IhE^Nnl+Ew8yjd6%He2hzWnnzBS<@t$ zRg{B~fuY+)C7_cvi<^<5xke>`p`>u~LQ5S+`^o1lwM6GIFfe!kg$=k$*^Lgo>;-aP z&!7CyGM2Gva*S2WC({{Qa*G|9uG+ebygqnqc%i=`)kLY*!u8n2gu zjL={x74YcgWd*7Jdt!2)wJe+C&;S2lY@0mMI+u}s@_%dD`hskrbwC3GUN8YQw;m`} z_vjU^h6;MTICUIoVy|cvSm4BqN8cb8D7>x#il)Hz9s??a>P?30?Sts`g6drc*Xsb( z>j6<#545-oq1hX*IS8WJ2&y>?u9?O7z-w!e?-UqH^?(K`K(#o$@BwP!-*y1#4ly54 z2zzvgr~s|vihTV>69Iw#;&qCA}}XJYpxpu sLqIA6!v`R})184q0K^8;ik=J%5kPhUkoNatV8}?_+!!jvv}gq%09^zIeEXT>FI!%u0W~f3lda6=TxmK;6{M z2Xr$;HtRUuV4P^cBG*4 Date: Thu, 17 Oct 2019 12:14:15 -0700 Subject: [PATCH 021/293] increase delayed output buffer to 32k --- src/options.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/options.c b/src/options.c index 3e10926c..413ea765 100644 --- a/src/options.c +++ b/src/options.c @@ -144,21 +144,23 @@ static void mi_out_stderr(const char* msg) { // function we also buffer output that happens earlier. When // an output function is registered it is called immediately with // the output up to that point. -#define MAX_OUT_BUF (8*1024) -static char out_buf[MAX_OUT_BUF+1]; +#ifndef MI_MAX_DELAY_OUTPUT +#define MI_MAX_DELAY_OUTPUT (32*1024) +#endif +static char out_buf[MI_MAX_DELAY_OUTPUT+1]; static _Atomic(uintptr_t) out_len; static void mi_out_buf(const char* msg) { if (msg==NULL) return; - if (mi_atomic_read_relaxed(&out_len)>=MAX_OUT_BUF) return; + if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; size_t n = strlen(msg); if (n==0) return; // claim space uintptr_t start = mi_atomic_addu(&out_len, n); - if (start >= MAX_OUT_BUF) return; + if (start >= MI_MAX_DELAY_OUTPUT) return; // check bound - if (start+n >= MAX_OUT_BUF) { - n = MAX_OUT_BUF-start-1; + if (start+n >= MI_MAX_DELAY_OUTPUT) { + n = MI_MAX_DELAY_OUTPUT-start-1; } memcpy(&out_buf[start], msg, n); } @@ -166,9 +168,9 @@ static void mi_out_buf(const char* msg) { static void mi_out_buf_flush(mi_output_fun* out) { if (out==NULL) return; // claim all (no more output will be added after this point) - size_t count = mi_atomic_addu(&out_len, MAX_OUT_BUF); + size_t count = mi_atomic_addu(&out_len, MI_MAX_DELAY_OUTPUT); // and output the current contents - if (count>MAX_OUT_BUF) count = MAX_OUT_BUF; + if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; out(out_buf); } From 93b4281b82768023563bc2eb4a1441d83f53efa4 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 17 Oct 2019 12:35:35 -0700 Subject: [PATCH 022/293] ensure randomized huge page start address in 1GiB aligned --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index cc69123a..c6ab4ab6 100644 --- a/src/os.c +++ b/src/os.c @@ -886,7 +886,7 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reser uint8_t* start = (uint8_t*)((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_reserve_huge_os_pages); - start = start + ((uintptr_t)MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB #endif // Allocate one page at the time but try to place them contiguously From 5de851a84d20835de2429ed60a6eeac3b0a8b6eb Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 17 Oct 2019 16:48:16 -0700 Subject: [PATCH 023/293] update page_flags to have more portable definition --- ide/vs2019/mimalloc.vcxproj | 2 +- include/mimalloc-internal.h | 8 ++++---- include/mimalloc-types.h | 21 +++++++++------------ src/alloc-aligned.c | 2 +- src/alloc.c | 6 +++--- src/init.c | 2 +- src/page.c | 8 ++++---- 7 files changed, 23 insertions(+), 26 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 5658b536..56beeff9 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -111,7 +111,7 @@ - Level3 + Level4 Disabled true true diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 1a5b639d..4c47af94 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -345,19 +345,19 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) // Page flags //----------------------------------------------------------- static inline bool mi_page_is_in_full(const mi_page_t* page) { - return page->flags.in_full; + return page->flags.x.in_full; } static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { - page->flags.in_full = in_full; + page->flags.x.in_full = in_full; } static inline bool mi_page_has_aligned(const mi_page_t* page) { - return page->flags.has_aligned; + return page->flags.x.has_aligned; } static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { - page->flags.has_aligned = has_aligned; + page->flags.x.has_aligned = has_aligned; } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index c2df6340..eea76a25 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -131,17 +131,13 @@ typedef enum mi_delayed_e { // The `in_full` and `has_aligned` page flags are put in a union to efficiently -// test if both are false (`value == 0`) in the `mi_free` routine. -typedef struct mi_page_flags_s { - #pragma warning(suppress:4201) - union { - uint8_t full_aligned; - struct { - uint8_t in_full : 1; - uint8_t has_aligned : 1; - }; - }; - bool is_zero; // `true` if the blocks in the free list are zero initialized +// test if both are false (`full_aligned == 0`) in the `mi_free` routine. +typedef union mi_page_flags_s { + uint8_t full_aligned; + struct { + uint8_t in_full : 1; + uint8_t has_aligned : 1; + } x; } mi_page_flags_t; // Thread free list. @@ -177,7 +173,8 @@ typedef struct mi_page_s { // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory - mi_page_flags_t flags; // `in_full` and `has_aligned` flags (16 bits) + mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) + bool is_zero; // `true` if the blocks in the free list are zero initialized mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #if MI_SECURE diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 99347933..5a59a63a 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -126,7 +126,7 @@ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t ne if (newp != NULL) { if (zero && newsize > size) { const mi_page_t* page = _mi_ptr_page(newp); - if (page->flags.is_zero) { + if (page->is_zero) { // already zero initialized mi_assert_expensive(mi_mem_is_zero(newp,newsize)); } diff --git a/src/alloc.c b/src/alloc.c index 3950496a..0c399671 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -33,7 +33,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz page->used++; mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); #if (MI_DEBUG) - if (!page->flags.is_zero) { memset(block, MI_DEBUG_UNINIT, size); } + if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); } #elif (MI_SECURE) block->next = 0; #endif @@ -96,7 +96,7 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { mi_assert_internal(p != NULL); mi_assert_internal(size > 0 && page->block_size >= size); mi_assert_internal(_mi_ptr_page(p)==page); - if (page->flags.is_zero) { + if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer mi_assert_expensive(mi_mem_is_zero(p,page->block_size)); @@ -147,7 +147,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_set_next(page, block, page->free); page->free = block; page->used--; - page->flags.is_zero = false; + page->is_zero = false; _mi_segment_page_free(page,true,&heap->tld->segments); } return; diff --git a/src/init.c b/src/init.c index d62a2d34..75836aca 100644 --- a/src/init.c +++ b/src/init.c @@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { 0, false, false, false, false, 0, 0, - { { 0 }, false }, + { 0 }, false, NULL, // free #if MI_SECURE 0, diff --git a/src/page.c b/src/page.c index 25e59977..77d98f11 100644 --- a/src/page.c +++ b/src/page.c @@ -192,7 +192,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // usual case page->free = page->local_free; page->local_free = NULL; - page->flags.is_zero = false; + page->is_zero = false; } else if (force) { // append -- only on shutdown (force) as this is a linear operation @@ -204,7 +204,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { mi_block_set_next(page, tail, page->free); page->free = page->local_free; page->local_free = NULL; - page->flags.is_zero = false; + page->is_zero = false; } } @@ -559,7 +559,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st // extension into zero initialized memory preserves the zero'd free list if (!page->is_zero_init) { - page->flags.is_zero = false; + page->is_zero = false; } mi_assert_expensive(mi_page_is_valid_init(page)); } @@ -579,7 +579,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi #if MI_SECURE page->cookie = _mi_heap_random(heap) | 1; #endif - page->flags.is_zero = page->is_zero_init; + page->is_zero = page->is_zero_init; mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); From fdfa6ed2602f2e6cbd6d5d466b24a7e447cc0e42 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 17 Oct 2019 16:56:57 -0700 Subject: [PATCH 024/293] fix warnings at high warning level in msvc --- ide/vs2019/mimalloc.vcxproj | 2 +- include/mimalloc-atomic.h | 2 +- src/memory.c | 2 +- src/options.c | 2 +- src/os.c | 2 +- src/page-queue.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 56beeff9..5658b536 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -111,7 +111,7 @@ - Level4 + Level3 Disabled true true diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 8b254d3e..dff0f011 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -130,7 +130,7 @@ static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); } static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { - return (expected == RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); + return (expected == (uintptr_t)RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); } static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { return mi_atomic_cas_strong(p,desired,expected); diff --git a/src/memory.c b/src/memory.c index 0ad582cd..f9c53782 100644 --- a/src/memory.c +++ b/src/memory.c @@ -71,7 +71,7 @@ bool _mi_os_is_huge_reserved(void* p); typedef uintptr_t mi_region_info_t; static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { - return ((uintptr_t)start | ((is_large?1:0) << 1) | (is_committed?1:0)); + return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0)); } static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) { diff --git a/src/options.c b/src/options.c index 413ea765..4e2bdeaa 100644 --- a/src/options.c +++ b/src/options.c @@ -346,7 +346,7 @@ static void mi_option_init(mi_option_desc_t* desc) { size_t len = strlen(s); if (len >= sizeof(buf)) len = sizeof(buf) - 1; for (size_t i = 0; i < len; i++) { - buf[i] = toupper(s[i]); + buf[i] = (char)toupper(s[i]); } buf[len] = 0; if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { diff --git a/src/os.c b/src/os.c index c6ab4ab6..ed938221 100644 --- a/src/os.c +++ b/src/os.c @@ -700,7 +700,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); #if 1 - if (p == start) { + if (p == start && start != NULL) { VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set } #endif diff --git a/src/page-queue.c b/src/page-queue.c index d613095f..4af70b50 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -57,7 +57,7 @@ static inline uint8_t mi_bsr32(uint32_t x); static inline uint8_t mi_bsr32(uint32_t x) { uint32_t idx; _BitScanReverse((DWORD*)&idx, x); - return idx; + return (uint8_t)idx; } #elif defined(__GNUC__) || defined(__clang__) static inline uint8_t mi_bsr32(uint32_t x) { From 2affdbbd2e6c26fba92c380375d2e1f7c8578ffe Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 18 Oct 2019 18:11:04 -0700 Subject: [PATCH 025/293] stronger secure mode when defining MI_SECURE=4: checks for double free, corrupted free list, and invalid pointer frees. Performance is impacted but not too much -- more perf testing is needed --- CMakeLists.txt | 2 +- ide/vs2019/mimalloc.vcxproj | 4 +-- include/mimalloc-internal.h | 45 ++++++++++++++++++++++++---------- include/mimalloc-types.h | 7 ++++-- src/alloc.c | 49 +++++++++++++++++++++++++++++++++++-- src/options.c | 8 ++++++ test/main-override-static.c | 20 +++++++++++++++ test/main-override.cpp | 6 +++++ 8 files changed, 121 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 443476f0..81cc339a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,7 +68,7 @@ endif() if(MI_SECURE MATCHES "ON") message(STATUS "Set secure build (MI_SECURE=ON)") - list(APPEND mi_defines MI_SECURE=2) + list(APPEND mi_defines MI_SECURE=3) endif() if(MI_SEE_ASM MATCHES "ON") diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 5658b536..28e96d71 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -111,12 +111,12 @@ - Level3 + Level2 Disabled true true ../../include - MI_DEBUG=3;%(PreprocessorDefinitions); + MI_DEBUG=1;%(PreprocessorDefinitions); CompileAsCpp false stdcpp17 diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 4c47af94..7bffb6ac 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -20,6 +20,18 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_trace_message(...) #endif +#if defined(_MSC_VER) +#define mi_decl_noinline __declspec(noinline) +#define mi_attr_noreturn +#elif defined(__GNUC__) || defined(__clang__) +#define mi_decl_noinline __attribute__((noinline)) +#define mi_attr_noreturn __attribute__((noreturn)) +#else +#define mi_decl_noinline +#define mi_attr_noreturn +#endif + + // "options.c" void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message); void _mi_fprintf(mi_output_fun* out, const char* fmt, ...); @@ -28,6 +40,7 @@ void _mi_warning_message(const char* fmt, ...); void _mi_verbose_message(const char* fmt, ...); void _mi_trace_message(const char* fmt, ...); void _mi_options_init(void); +void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn; // "init.c" extern mi_stats_t _mi_stats_main; @@ -124,13 +137,6 @@ bool _mi_page_is_valid(mi_page_t* page); #define __has_builtin(x) 0 #endif -#if defined(_MSC_VER) -#define mi_decl_noinline __declspec(noinline) -#elif defined(__GNUC__) || defined(__clang__) -#define mi_decl_noinline __attribute__((noinline)) -#else -#define mi_decl_noinline -#endif /* ----------------------------------------------------------- @@ -365,8 +371,12 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { // Encoding/Decoding the free list next pointers // ------------------------------------------------------------------- -static inline mi_block_t* mi_block_nextx( uintptr_t cookie, mi_block_t* block ) { - #if MI_SECURE +static inline bool mi_is_in_same_segment(const void* p, const void* q) { + return (_mi_ptr_segment(p) == _mi_ptr_segment(q)); +} + +static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* block ) { + #if MI_SECURE return (mi_block_t*)(block->next ^ cookie); #else UNUSED(cookie); @@ -374,7 +384,7 @@ static inline mi_block_t* mi_block_nextx( uintptr_t cookie, mi_block_t* block ) #endif } -static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, mi_block_t* next) { +static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { #if MI_SECURE block->next = (mi_encoded_t)next ^ cookie; #else @@ -383,16 +393,25 @@ static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, mi_bl #endif } -static inline mi_block_t* mi_block_next(mi_page_t* page, mi_block_t* block) { +static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { #if MI_SECURE - return mi_block_nextx(page->cookie,block); + mi_block_t* next = mi_block_nextx(page->cookie,block); + #if MI_SECURE >= 4 + // check if next is at least in our segment range + // TODO: it is better to check if it is actually inside our page but that is more expensive + // to calculate. Perhaps with a relative free list this becomes feasible? + if (next!=NULL && !mi_is_in_same_segment(block, next)) { + _mi_fatal_error("corrupted free list entry at %p: %zx\n", block, (uintptr_t)next); + } + #endif + return next; #else UNUSED(page); return mi_block_nextx(0, block); #endif } -static inline void mi_block_set_next(mi_page_t* page, mi_block_t* block, mi_block_t* next) { +static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { #if MI_SECURE mi_block_set_nextx(page->cookie,block,next); #else diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index eea76a25..00a83839 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -22,8 +22,11 @@ terms of the MIT license. A copy of the license can be found in the file // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). // #define MI_STAT 1 -// Define MI_SECURE as 1 to encode free lists -// #define MI_SECURE 1 +// Define MI_SECURE to enable security mitigations +// #define MI_SECURE 1 // guard page around metadata +// #define MI_SECURE 2 // guard page around each mimalloc page +// #define MI_SECURE 3 // encode free lists +// #define MI_SECURE 4 // all security enabled (checks for double free, corrupted free list and invalid pointer free) #if !defined(MI_SECURE) #define MI_SECURE 0 diff --git a/src/alloc.c b/src/alloc.c index 0c399671..f5208a0a 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -124,10 +124,50 @@ mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept { } +// ------------------------------------------------------ +// Check for double free in secure mode +// ------------------------------------------------------ + +#if MI_SECURE>=4 +static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) { + while (list != NULL) { + if (elem==list) return true; + list = mi_block_next(page, list); + } + return false; +} + +static mi_decl_noinline void mi_free_check_blockx(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { + size_t psize; + uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); + if ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize)) { + // Suspicious: the decoded value is in the same page. + // Walk the free lists to see if it is already freed + if (mi_list_contains(page, page->free, n) || + mi_list_contains(page, page->local_free, n) || + mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), n)) + { + _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); + } + } +} + +static inline void mi_free_check_block(const mi_page_t* page, const mi_block_t* block) { + mi_block_t* n = (mi_block_t*)(block->next ^ page->cookie); + if (n!=NULL && mi_is_in_same_segment(block, n)) { // quick check + // Suspicous: decoded value in block is in the same segment, maybe a double free? + mi_free_check_blockx(page, block, n); + } + return; +} +#endif + + // ------------------------------------------------------ // Free // ------------------------------------------------------ + // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { @@ -251,14 +291,16 @@ void mi_free(void* p) mi_attr_noexcept #if (MI_DEBUG>0) if (mi_unlikely(!mi_is_in_heap_region(p))) { - _mi_warning_message("possibly trying to mi_free a pointer that does not point to a valid heap region: 0x%p\n" + _mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: 0x%p\n" "(this may still be a valid very large allocation (over 64MiB))\n", p); if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { _mi_warning_message("(yes, the previous pointer 0x%p was valid after all)\n", p); } } +#endif +#if (MI_DEBUG>0 || MI_SECURE>=4) if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { - _mi_error_message("trying to mi_free a pointer that does not point to a valid heap space: %p\n", p); + _mi_error_message("trying to free a pointer that does not point to a valid heap space: %p\n", p); return; } #endif @@ -278,6 +320,9 @@ void mi_free(void* p) mi_attr_noexcept if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; + #if MI_SECURE>=4 + mi_free_check_block(page,block); + #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; diff --git a/src/options.c b/src/options.c index 4e2bdeaa..e74d9eb5 100644 --- a/src/options.c +++ b/src/options.c @@ -285,6 +285,14 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co } #endif +mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) { + va_list args; + va_start(args, fmt); + mi_vfprintf(NULL, "mimalloc: fatal: ", fmt, args); + va_end(args); + exit(99); +} + // -------------------------------------------------------- // Initialize options by checking the environment // -------------------------------------------------------- diff --git a/test/main-override-static.c b/test/main-override-static.c index 6ddf4f37..d8369389 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -2,12 +2,16 @@ #include #include #include +#include #include #include // redefines malloc etc. +static void double_free(); + int main() { mi_version(); + double_free(); void* p1 = malloc(78); void* p2 = malloc(24); free(p1); @@ -29,3 +33,19 @@ int main() { mi_stats_print(NULL); return 0; } + +static void double_free() { + void* p[256]; + uintptr_t buf[256]; + + p[0] = mi_malloc(622616); + p[1] = mi_malloc(655362); + p[2] = mi_malloc(786432); + mi_free(p[2]); + // [VULN] Double free + mi_free(p[2]); + p[3] = mi_malloc(786456); + // [BUG] Found overlap + // p[3]=0x429b2ea2000 (size=917504), p[1]=0x429b2e42000 (size=786432) + fprintf(stderr, "p3: %p-%p, p1: %p-%p, p2: %p\n", p[3], (uint8_t*)(p[3]) + 786456, p[1], (uint8_t*)(p[1]) + 655362, p[2]); +} diff --git a/test/main-override.cpp b/test/main-override.cpp index 4bc91ae8..ea940061 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -2,10 +2,13 @@ #include #include #include +#include #include #include +static void double_free(); + static void* p = malloc(8); void free_p() { @@ -24,6 +27,7 @@ public: int main() { //mi_stats_reset(); // ignore earlier allocations + double_free(); atexit(free_p); void* p1 = malloc(78); void* p2 = mi_malloc_aligned(16,24); @@ -66,3 +70,5 @@ public: }; static Static s = Static(); + + From 25246070aeb3dc5a8f602a6e34222284b18560b4 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 19 Oct 2019 08:34:18 -0700 Subject: [PATCH 026/293] fix double free check in secure = 4 mode; inline _mi_ptr_cookie --- include/mimalloc-internal.h | 5 ++++- src/alloc.c | 32 ++++++++++++++++++-------------- src/init.c | 4 ---- test/main-override-static.c | 26 +++++++++++++++++++++++--- 4 files changed, 45 insertions(+), 22 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 7bffb6ac..cf0252c6 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -46,7 +46,6 @@ void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn; extern mi_stats_t _mi_stats_main; extern const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); -uintptr_t _mi_ptr_cookie(const void* p); uintptr_t _mi_random_shuffle(uintptr_t x); uintptr_t _mi_random_init(uintptr_t seed /* can be zero */); bool _mi_preloading(); // true while the C runtime is not ready @@ -244,6 +243,10 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) { return (heap != &_mi_heap_empty); } +static inline uintptr_t _mi_ptr_cookie(const void* p) { + return ((uintptr_t)p ^ _mi_heap_main.cookie); +} + /* ----------------------------------------------------------- Pages ----------------------------------------------------------- */ diff --git a/src/alloc.c b/src/alloc.c index f5208a0a..916b1f32 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -137,28 +137,32 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons return false; } -static mi_decl_noinline void mi_free_check_blockx(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { +static mi_decl_noinline bool mi_check_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { size_t psize; uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); - if ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize)) { - // Suspicious: the decoded value is in the same page. + if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) { + // Suspicious: the decoded value is in the same page (or NULL). // Walk the free lists to see if it is already freed - if (mi_list_contains(page, page->free, n) || - mi_list_contains(page, page->local_free, n) || - mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), n)) + if (mi_list_contains(page, page->free, block) || + mi_list_contains(page, page->local_free, block) || + mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) { _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); + return true; } } + return false; } -static inline void mi_free_check_block(const mi_page_t* page, const mi_block_t* block) { +static inline bool mi_check_double_free(const mi_page_t* page, const mi_block_t* block) { mi_block_t* n = (mi_block_t*)(block->next ^ page->cookie); - if (n!=NULL && mi_is_in_same_segment(block, n)) { // quick check - // Suspicous: decoded value in block is in the same segment, maybe a double free? - mi_free_check_blockx(page, block, n); - } - return; + if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check + (n==NULL || mi_is_in_same_segment(block, n))) + { + // Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free? + return mi_check_double_freex(page, block, n); + } + return false; } #endif @@ -320,8 +324,8 @@ void mi_free(void* p) mi_attr_noexcept if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; - #if MI_SECURE>=4 - mi_free_check_block(page,block); + #if MI_SECURE>=4 + if (mi_check_double_free(page,block)) return; #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; diff --git a/src/init.c b/src/init.c index 75836aca..6514ce53 100644 --- a/src/init.c +++ b/src/init.c @@ -184,10 +184,6 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { return x; } -uintptr_t _mi_ptr_cookie(const void* p) { - return ((uintptr_t)p ^ _mi_heap_main.cookie); -} - /* ----------------------------------------------------------- Initialization and freeing of the thread local heaps ----------------------------------------------------------- */ diff --git a/test/main-override-static.c b/test/main-override-static.c index d8369389..ed5048e0 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -7,11 +7,13 @@ #include #include // redefines malloc etc. -static void double_free(); +static void double_free1(); +static void double_free2(); int main() { mi_version(); - double_free(); + //double_free1(); + //double_free2(); void* p1 = malloc(78); void* p2 = malloc(24); free(p1); @@ -34,7 +36,7 @@ int main() { return 0; } -static void double_free() { +static void double_free1() { void* p[256]; uintptr_t buf[256]; @@ -49,3 +51,21 @@ static void double_free() { // p[3]=0x429b2ea2000 (size=917504), p[1]=0x429b2e42000 (size=786432) fprintf(stderr, "p3: %p-%p, p1: %p-%p, p2: %p\n", p[3], (uint8_t*)(p[3]) + 786456, p[1], (uint8_t*)(p[1]) + 655362, p[2]); } + +static void double_free2() { + void* p[256]; + uintptr_t buf[256]; + // [INFO] Command buffer: 0x327b2000 + // [INFO] Input size: 182 + p[0] = malloc(712352); + p[1] = malloc(786432); + free(p[0]); + // [VULN] Double free + free(p[0]); + p[2] = malloc(786440); + p[3] = malloc(917504); + p[4] = malloc(786440); + // [BUG] Found overlap + // p[4]=0x433f1402000 (size=917504), p[1]=0x433f14c2000 (size=786432) + fprintf(stderr, "p1: %p-%p, p2: %p-%p\n", p[4], (uint8_t*)(p[4]) + 917504, p[1], (uint8_t*)(p[1]) + 786432); +} From 5dfdc092b50612abddadc97bf609222bef2ab00f Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 28 Oct 2019 12:26:57 -0700 Subject: [PATCH 027/293] improve windows warning message --- src/os.c | 2 +- test/main-override.cpp | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/os.c b/src/os.c index ed938221..0cd3a1ab 100644 --- a/src/os.c +++ b/src/os.c @@ -283,7 +283,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } if (p == NULL) { - _mi_warning_message("unable to alloc mem error: err: %i size: 0x%x \n", GetLastError(), size); + _mi_warning_message("unable to allocate memory: error code: %i, addr: %p, size: 0x%x, large only: %d, allow_large: %d\n", GetLastError(), addr, size, large_only, allow_large); } return p; } diff --git a/test/main-override.cpp b/test/main-override.cpp index ea940061..92740c6e 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -7,8 +7,6 @@ #include #include -static void double_free(); - static void* p = malloc(8); void free_p() { From ff9f29660b38ad3dfb9b8a55d7b5fb1837a50c7f Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 28 Oct 2019 12:27:32 -0700 Subject: [PATCH 028/293] remove double_free call --- test/main-override.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/main-override.cpp b/test/main-override.cpp index 92740c6e..e006ad27 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -25,7 +25,6 @@ public: int main() { //mi_stats_reset(); // ignore earlier allocations - double_free(); atexit(free_p); void* p1 = malloc(78); void* p2 = mi_malloc_aligned(16,24); From 28d4ec4c5ad39bc9459cd432c34f8d9234b51431 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 28 Oct 2019 13:14:14 -0700 Subject: [PATCH 029/293] fix statistics accounting of huge pages --- src/alloc.c | 10 +++++++++- src/page.c | 3 ++- test/test-stress.c | 7 ++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 916b1f32..f8dab24d 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -192,7 +192,15 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc page->free = block; page->used--; page->is_zero = false; - _mi_segment_page_free(page,true,&heap->tld->segments); + mi_assert(page->used == 0); + mi_tld_t* tld = heap->tld; + if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&tld->stats.giant, page->block_size); + } + else { + _mi_stat_decrease(&tld->stats.huge, page->block_size); + } + _mi_segment_page_free(page,true,&tld->segments); } return; } diff --git a/src/page.c b/src/page.c index 77d98f11..b71be522 100644 --- a/src/page.c +++ b/src/page.c @@ -370,6 +370,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_page_set_has_aligned(page, false); // account for huge pages here + // (note: no longer necessary as huge pages are always abandoned) if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { _mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size); @@ -378,7 +379,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); } } - + // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) mi_segments_tld_t* segments_tld = &page->heap->tld->segments; diff --git a/test/test-stress.c b/test/test-stress.c index 354e2b07..bb428072 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -158,6 +158,7 @@ int main(int argc, char** argv) { //printf("(reserve huge: %i\n)", res); //bench_start_program(); + mi_stats_reset(); memset((void*)transfer, 0, TRANSFERS*sizeof(void*)); run_os_threads(THREADS); for (int i = 0; i < TRANSFERS; i++) { @@ -165,7 +166,6 @@ int main(int argc, char** argv) { } #ifndef NDEBUG mi_collect(false); - mi_collect(true); #endif mi_stats_print(NULL); //bench_end_program(); @@ -191,6 +191,11 @@ static void run_os_threads(size_t nthreads) { for (size_t i = 0; i < nthreads; i++) { WaitForSingleObject(thandles[i], INFINITE); } + for (size_t i = 0; i < nthreads; i++) { + CloseHandle(thandles[i]); + } + free(tids); + free(thandles); } static void* atomic_exchange_ptr(volatile void** p, void* newval) { From 081e2d1eb6d517f1949b3245d37f758e8a27ac3f Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 28 Oct 2019 13:43:42 -0700 Subject: [PATCH 030/293] fix statistics display --- include/mimalloc-types.h | 6 +++--- src/init.c | 4 ++-- src/os.c | 4 ++-- src/page.c | 2 +- src/stats.c | 36 ++++++++++++++++++++++++------------ 5 files changed, 32 insertions(+), 20 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 00a83839..2e5d481b 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -340,14 +340,14 @@ typedef struct mi_stats_s { mi_stat_count_t page_committed; mi_stat_count_t segments_abandoned; mi_stat_count_t pages_abandoned; - mi_stat_count_t pages_extended; - mi_stat_count_t mmap_calls; - mi_stat_count_t commit_calls; mi_stat_count_t threads; mi_stat_count_t huge; mi_stat_count_t giant; mi_stat_count_t malloc; mi_stat_count_t segments_cache; + mi_stat_counter_t pages_extended; + mi_stat_counter_t mmap_calls; + mi_stat_counter_t commit_calls; mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; mi_stat_counter_t huge_count; diff --git a/src/init.c b/src/init.c index 6514ce53..d361de3a 100644 --- a/src/init.c +++ b/src/init.c @@ -64,8 +64,8 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ - MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ - MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() diff --git a/src/os.c b/src/os.c index 0cd3a1ab..8f5afc5b 100644 --- a/src/os.c +++ b/src/os.c @@ -477,7 +477,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); #endif - _mi_stat_increase(&stats->mmap_calls, 1); + mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); if (commit) { _mi_stat_increase(&stats->committed, size); } @@ -632,7 +632,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ int err = 0; if (commit) { _mi_stat_increase(&stats->committed, csize); - _mi_stat_increase(&stats->commit_calls, 1); + _mi_stat_counter_increase(&stats->commit_calls, 1); } else { _mi_stat_decrease(&stats->committed, csize); diff --git a/src/page.c b/src/page.c index b71be522..2a48c64b 100644 --- a/src/page.c +++ b/src/page.c @@ -531,7 +531,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st size_t page_size; _mi_page_start(_mi_page_segment(page), page, &page_size); - mi_stat_increase(stats->pages_extended, 1); + mi_stat_counter_increase(stats->pages_extended, 1); // calculate the extend count size_t extend = page->reserved - page->capacity; diff --git a/src/stats.c b/src/stats.c index 37a7bde4..50bd029d 100644 --- a/src/stats.c +++ b/src/stats.c @@ -95,15 +95,17 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1); - mi_stat_add(&stats->mmap_calls, &src->mmap_calls, 1); - mi_stat_add(&stats->commit_calls, &src->commit_calls, 1); mi_stat_add(&stats->threads, &src->threads, 1); - mi_stat_add(&stats->pages_extended, &src->pages_extended, 1); mi_stat_add(&stats->malloc, &src->malloc, 1); mi_stat_add(&stats->segments_cache, &src->segments_cache, 1); mi_stat_add(&stats->huge, &src->huge, 1); mi_stat_add(&stats->giant, &src->giant, 1); + + mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1); + mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1); + mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1); + mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); @@ -121,6 +123,9 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { Display statistics ----------------------------------------------------------- */ +// unit > 0 : size in binary bytes +// unit == 0: count as decimal +// unit < 0 : count in binary static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char* fmt) { char buf[32]; int len = 32; @@ -165,17 +170,24 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t _mi_fprintf(out, " ok\n"); } else if (unit<0) { - mi_print_amount(stat->peak, 1, out); - mi_print_amount(stat->allocated, 1, out); - mi_print_amount(stat->freed, 1, out); - mi_print_amount(-unit, 1, out); - mi_print_count((stat->allocated / -unit), 0, out); + mi_print_amount(stat->peak, -1, out); + mi_print_amount(stat->allocated, -1, out); + mi_print_amount(stat->freed, -1, out); + if (unit==-1) { + _mi_fprintf(out, "%22s", ""); + } + else { + mi_print_amount(-unit, 1, out); + mi_print_count((stat->allocated / -unit), 0, out); + } if (stat->allocated > stat->freed) _mi_fprintf(out, " not all freed!\n"); else _mi_fprintf(out, " ok\n"); } else { + mi_print_amount(stat->peak, 1, out); + mi_print_amount(stat->allocated, 1, out); _mi_fprintf(out, "\n"); } } @@ -247,11 +259,11 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_stat_print(&stats->segments_cache, "-cached", -1, out); mi_stat_print(&stats->pages, "pages", -1, out); mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out); - mi_stat_print(&stats->pages_extended, "-extended", 0, out); + mi_stat_counter_print(&stats->pages_extended, "-extended", out); mi_stat_counter_print(&stats->page_no_retire, "-noretire", out); - mi_stat_print(&stats->mmap_calls, "mmaps", 0, out); - mi_stat_print(&stats->commit_calls, "commits", 0, out); - mi_stat_print(&stats->threads, "threads", 0, out); + mi_stat_counter_print(&stats->mmap_calls, "mmaps", out); + mi_stat_counter_print(&stats->commit_calls, "commits", out); + mi_stat_print(&stats->threads, "threads", -1, out); mi_stat_counter_print_avg(&stats->searches, "searches", out); if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs); From 87bdfbb9b6b0a869f9ce8e76fd4fa580bb816840 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 28 Oct 2019 14:12:06 -0700 Subject: [PATCH 031/293] use more conservative retire strategy --- src/page.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/page.c b/src/page.c index 2a48c64b..5a186727 100644 --- a/src/page.c +++ b/src/page.c @@ -407,16 +407,18 @@ void _mi_page_retire(mi_page_t* page) { // (or we end up retiring and re-allocating most of the time) // NOTE: refine this more: we should not retire if this // is the only page left with free blocks. It is not clear - // how to check this efficiently though... for now we just check - // if its neighbours are almost fully used. + // how to check this efficiently though... + // for now, we don't retire if it is the only page left of this size class. + mi_page_queue_t* pq = mi_page_queue_of(page); if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) { - if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { + // if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { + if (pq->last==page && pq->first==page) { mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); return; // dont't retire after all } } - _mi_page_free(page, mi_page_queue_of(page), false); + _mi_page_free(page, pq, false); } From b052d3b73129fee02e145b7c1b8b2153dd39af0d Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 28 Oct 2019 15:54:33 -0700 Subject: [PATCH 032/293] enable double free and heap corruption detection in debug mode --- include/mimalloc-internal.h | 23 +++++++++--------- include/mimalloc-types.h | 27 ++++++++++++++------- src/alloc.c | 47 +++++++++++++++++++++---------------- src/init.c | 4 ++-- src/options.c | 4 +++- src/page.c | 6 ++--- test/main-override-static.c | 42 +++++++++++++++++++++++++++++++-- 7 files changed, 104 insertions(+), 49 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cf0252c6..ccf12a06 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -379,7 +379,7 @@ static inline bool mi_is_in_same_segment(const void* p, const void* q) { } static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* block ) { - #if MI_SECURE + #ifdef MI_ENCODE_FREELIST return (mi_block_t*)(block->next ^ cookie); #else UNUSED(cookie); @@ -388,7 +388,7 @@ static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* bl } static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { - #if MI_SECURE + #ifdef MI_ENCODE_FREELIST block->next = (mi_encoded_t)next ^ cookie; #else UNUSED(cookie); @@ -397,16 +397,15 @@ static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { - #if MI_SECURE + #ifdef MI_ENCODE_FREELIST mi_block_t* next = mi_block_nextx(page->cookie,block); - #if MI_SECURE >= 4 - // check if next is at least in our segment range - // TODO: it is better to check if it is actually inside our page but that is more expensive - // to calculate. Perhaps with a relative free list this becomes feasible? - if (next!=NULL && !mi_is_in_same_segment(block, next)) { - _mi_fatal_error("corrupted free list entry at %p: %zx\n", block, (uintptr_t)next); - } - #endif + // check for free list corruption: is `next` at least in our segment range? + // TODO: it is better to check if it is actually inside our page but that is more expensive + // to calculate. Perhaps with a relative free list this becomes feasible? + if (next!=NULL && !mi_is_in_same_segment(block, next)) { + _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); + next = NULL; + } return next; #else UNUSED(page); @@ -415,7 +414,7 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* } static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { - #if MI_SECURE + #ifdef MI_ENCODE_FREELIST mi_block_set_nextx(page->cookie,block,next); #else UNUSED(page); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 2e5d481b..99b6b22b 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -25,24 +25,30 @@ terms of the MIT license. A copy of the license can be found in the file // Define MI_SECURE to enable security mitigations // #define MI_SECURE 1 // guard page around metadata // #define MI_SECURE 2 // guard page around each mimalloc page -// #define MI_SECURE 3 // encode free lists -// #define MI_SECURE 4 // all security enabled (checks for double free, corrupted free list and invalid pointer free) +// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free) +// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. #if !defined(MI_SECURE) #define MI_SECURE 0 #endif -// Define MI_DEBUG as 1 for basic assert checks and statistics -// set it to 2 to do internal asserts, -// and to 3 to do extensive invariant checking. +// Define MI_DEBUG for debug mode +// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free. +// #define MI_DEBUG 2 // + internal assertion checks +// #define MI_DEBUG 3 // + extensive internal invariant checking #if !defined(MI_DEBUG) #if !defined(NDEBUG) || defined(_DEBUG) -#define MI_DEBUG 1 +#define MI_DEBUG 2 #else #define MI_DEBUG 0 #endif #endif +// Encoded free lists allow detection of corrupted free lists +// and can detect buffer overflows and double `free`s. +#if (MI_SECURE>=3 || MI_DEBUG>=1) +#define MI_ENCODE_FREELIST 1 +#endif // ------------------------------------------------------ // Platform specific values @@ -117,6 +123,8 @@ terms of the MIT license. A copy of the license can be found in the file #error "define more bins" #endif +// The free lists use encoded next fields +// (Only actually encodes when MI_ENCODED_FREELIST is defined.) typedef uintptr_t mi_encoded_t; // free lists contain blocks @@ -125,6 +133,7 @@ typedef struct mi_block_s { } mi_block_t; +// The delayed flags are used for efficient multi-threaded free-ing typedef enum mi_delayed_e { MI_NO_DELAYED_FREE = 0, MI_USE_DELAYED_FREE = 1, @@ -180,7 +189,7 @@ typedef struct mi_page_s { bool is_zero; // `true` if the blocks in the free list are zero initialized mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) - #if MI_SECURE + #ifdef MI_ENCODE_FREELIST uintptr_t cookie; // random cookie to encode the free lists #endif size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) @@ -197,8 +206,8 @@ typedef struct mi_page_s { // improve page index calculation // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word - #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0) - void* padding[1]; // 12 words on 64-bit in secure mode, 12 words on 32-bit plain + #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) + void* padding[1]; // 12 words on 64-bit with cookie, 12 words on 32-bit plain #endif } mi_page_t; diff --git a/src/alloc.c b/src/alloc.c index f8dab24d..d2319f82 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -32,10 +32,10 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz page->free = mi_block_next(page,block); page->used++; mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); -#if (MI_DEBUG) +#if (MI_DEBUG!=0) if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); } -#elif (MI_SECURE) - block->next = 0; +#elif (MI_SECURE!=0) + block->next = 0; // don't leak internal data #endif #if (MI_STAT>1) if(size <= MI_LARGE_OBJ_SIZE_MAX) { @@ -125,10 +125,12 @@ mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept { // ------------------------------------------------------ -// Check for double free in secure mode +// Check for double free in secure and debug mode +// This is somewhat expensive so only enabled for secure mode 4 // ------------------------------------------------------ -#if MI_SECURE>=4 +#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0)) +// linear check if the free list contains a specific element static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) { while (list != NULL) { if (elem==list) return true; @@ -137,15 +139,15 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons return false; } -static mi_decl_noinline bool mi_check_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { +static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { size_t psize; uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) { // Suspicious: the decoded value is in the same page (or NULL). - // Walk the free lists to see if it is already freed + // Walk the free lists to verify positively if it is already freed if (mi_list_contains(page, page->free, block) || - mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) + mi_list_contains(page, page->local_free, block) || + mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) { _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); return true; @@ -154,16 +156,23 @@ static mi_decl_noinline bool mi_check_double_freex(const mi_page_t* page, const return false; } -static inline bool mi_check_double_free(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* n = (mi_block_t*)(block->next ^ page->cookie); - if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check - (n==NULL || mi_is_in_same_segment(block, n))) +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + mi_block_t* n = mi_block_nextx(page->cookie, block); // pretend it is freed, and get the decoded first field + if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? + (n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? { // Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free? - return mi_check_double_freex(page, block, n); + // (continue in separate function to improve code generation) + return mi_check_is_double_freex(page, block, n); } return false; } +#else +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + UNUSED(page); + UNUSED(block); + return false; +} #endif @@ -171,7 +180,6 @@ static inline bool mi_check_double_free(const mi_page_t* page, const mi_block_t* // Free // ------------------------------------------------------ - // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { @@ -258,6 +266,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block // and push it on the free list if (mi_likely(local)) { // owning thread can free a block directly + if (mi_check_is_double_free(page, block)) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -301,7 +310,7 @@ void mi_free(void* p) mi_attr_noexcept const mi_segment_t* const segment = _mi_ptr_segment(p); if (mi_unlikely(segment == NULL)) return; // checks for (p==NULL) -#if (MI_DEBUG>0) +#if (MI_DEBUG!=0) if (mi_unlikely(!mi_is_in_heap_region(p))) { _mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: 0x%p\n" "(this may still be a valid very large allocation (over 64MiB))\n", p); @@ -310,7 +319,7 @@ void mi_free(void* p) mi_attr_noexcept } } #endif -#if (MI_DEBUG>0 || MI_SECURE>=4) +#if (MI_DEBUG!=0 || MI_SECURE>=4) if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { _mi_error_message("trying to free a pointer that does not point to a valid heap space: %p\n", p); return; @@ -332,9 +341,7 @@ void mi_free(void* p) mi_attr_noexcept if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; - #if MI_SECURE>=4 - if (mi_check_double_free(page,block)) return; - #endif + if (mi_check_is_double_free(page,block)) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; diff --git a/src/init.c b/src/init.c index d361de3a..e15d82eb 100644 --- a/src/init.c +++ b/src/init.c @@ -15,14 +15,14 @@ const mi_page_t _mi_page_empty = { 0, false, false, false, false, 0, 0, { 0 }, false, NULL, // free - #if MI_SECURE + #if MI_ENCODE_FREELIST 0, #endif 0, // used NULL, ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0) + #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) , { NULL } // padding #endif }; diff --git a/src/options.c b/src/options.c index e74d9eb5..d71e5d1c 100644 --- a/src/options.c +++ b/src/options.c @@ -290,7 +290,9 @@ mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) { va_start(args, fmt); mi_vfprintf(NULL, "mimalloc: fatal: ", fmt, args); va_end(args); - exit(99); + #if (MI_SECURE>=0) + abort(); + #endif } // -------------------------------------------------------- diff --git a/src/page.c b/src/page.c index 5a186727..c1d29d46 100644 --- a/src/page.c +++ b/src/page.c @@ -512,7 +512,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t e ----------------------------------------------------------- */ #define MI_MAX_EXTEND_SIZE (4*1024) // heuristic, one OS page seems to work well. -#if MI_SECURE +#if (MI_SECURE>0) #define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many #else #define MI_MIN_EXTEND (1) @@ -579,7 +579,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->block_size = block_size; mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); - #if MI_SECURE + #ifdef MI_ENCODE_FREELIST page->cookie = _mi_heap_random(heap) | 1; #endif page->is_zero = page->is_zero_init; @@ -592,7 +592,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); mi_assert_internal(!mi_page_has_aligned(page)); - #if MI_SECURE + #if (MI_ENCODE_FREELIST) mi_assert_internal(page->cookie != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); diff --git a/test/main-override-static.c b/test/main-override-static.c index ed5048e0..19712411 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -9,11 +9,16 @@ static void double_free1(); static void double_free2(); +static void corrupt_free(); int main() { mi_version(); + + // detect double frees and heap corruption //double_free1(); //double_free2(); + //corrupt_free(); + void* p1 = malloc(78); void* p2 = malloc(24); free(p1); @@ -36,9 +41,13 @@ int main() { return 0; } + +// The double free samples come ArcHeap [1] by Insu Yun (issue #161) +// [1]: https://arxiv.org/pdf/1903.00503.pdf + static void double_free1() { void* p[256]; - uintptr_t buf[256]; + //uintptr_t buf[256]; p[0] = mi_malloc(622616); p[1] = mi_malloc(655362); @@ -54,7 +63,7 @@ static void double_free1() { static void double_free2() { void* p[256]; - uintptr_t buf[256]; + //uintptr_t buf[256]; // [INFO] Command buffer: 0x327b2000 // [INFO] Input size: 182 p[0] = malloc(712352); @@ -69,3 +78,32 @@ static void double_free2() { // p[4]=0x433f1402000 (size=917504), p[1]=0x433f14c2000 (size=786432) fprintf(stderr, "p1: %p-%p, p2: %p-%p\n", p[4], (uint8_t*)(p[4]) + 917504, p[1], (uint8_t*)(p[1]) + 786432); } + + +// Try to corrupt the heap through buffer overflow +#define N 256 +#define SZ 64 + +static void corrupt_free() { + void* p[N]; + // allocate + for (int i = 0; i < N; i++) { + p[i] = malloc(SZ); + } + // free some + for (int i = 0; i < N; i += (N/10)) { + free(p[i]); + p[i] = NULL; + } + // try to corrupt the free list + for (int i = 0; i < N; i++) { + if (p[i] != NULL) { + memset(p[i], 0, SZ+8); + } + } + // allocate more.. trying to trigger an allocation from a corrupted entry + // this may need many allocations to get there (if at all) + for (int i = 0; i < 4096; i++) { + malloc(SZ); + } +} \ No newline at end of file From 6cf16b1201fdfa47e18020b2166d49c5b97d2097 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 30 Oct 2019 14:32:28 -0700 Subject: [PATCH 033/293] fix reset error on windows when disabling eager commit option --- src/memory.c | 10 +++++++--- src/options.c | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/memory.c b/src/memory.c index f9c53782..dd03cf95 100644 --- a/src/memory.c +++ b/src/memory.c @@ -461,10 +461,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large if (!is_large) { if (mi_option_is_enabled(mi_option_segment_reset)) { - _mi_os_reset(p, size, stats); // - // _mi_os_decommit(p,size,stats); // if !is_eager_committed (and clear dirty bits) + if (!is_eager_committed && // cannot reset large pages + (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead + mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments + { + _mi_os_reset(p, size, stats); + //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? + } } - // else { _mi_os_reset(p,size,stats); } } if (!is_eager_committed) { // adjust commit statistics as we commit again when re-using the same slot diff --git a/src/options.c b/src/options.c index d71e5d1c..a49c46ed 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(cache_reset) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free + { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 100, UNINIT, MI_OPTION(os_tag) } // only apple specific for now but might serve more or less related purpose }; From 4a4d74927ccd7c07e68d28ca372c6c30408ad92f Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 30 Oct 2019 14:53:21 -0700 Subject: [PATCH 034/293] protect against double-free in multi-threaded free list --- src/page.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/page.c b/src/page.c index c1d29d46..ab271309 100644 --- a/src/page.c +++ b/src/page.c @@ -161,14 +161,21 @@ static void _mi_page_thread_free_collect(mi_page_t* page) // return if the list is empty if (head == NULL) return; - // find the tail + // find the tail -- also to get a proper count (without data races) + uintptr_t max_count = page->capacity; // cannot collect more than capacity uintptr_t count = 1; mi_block_t* tail = head; mi_block_t* next; - while ((next = mi_block_next(page,tail)) != NULL) { + while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) { count++; tail = next; } + // if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free) + if (count > max_count) { + _mi_fatal_error("corrupted thread-free list\n"); + return; // the thread-free items cannot be freed + } + // and append the current local free list mi_block_set_next(page,tail, page->local_free); page->local_free = head; From 8725a88fba49b719078240c65561e7c68bb286c9 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 30 Oct 2019 15:22:40 -0700 Subject: [PATCH 035/293] fix assertion in debug secure mode --- src/page.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index ab271309..f7fad764 100644 --- a/src/page.c +++ b/src/page.c @@ -441,8 +441,10 @@ void _mi_page_retire(mi_page_t* page) { static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) { UNUSED(stats); + #if (MI_SECURE<=2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); + #endif mi_assert_internal(page->capacity + extend <= page->reserved); void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL); size_t bsize = page->block_size; @@ -532,10 +534,12 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t e // extra test in malloc? or cache effects?) static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* stats) { UNUSED(stats); + mi_assert_expensive(mi_page_is_valid_init(page)); + #if (MI_SECURE<=2) mi_assert(page->free == NULL); mi_assert(page->local_free == NULL); - mi_assert_expensive(mi_page_is_valid_init(page)); if (page->free != NULL) return; + #endif if (page->capacity >= page->reserved) return; size_t page_size; From d36d04b4a6e5ada99fa36447332e5d7d3b1d33be Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 31 Oct 2019 15:35:10 -0700 Subject: [PATCH 036/293] add arena for huge page management --- ide/vs2019/mimalloc-override.vcxproj | 1 + ide/vs2019/mimalloc.vcxproj | 1 + include/mimalloc-internal.h | 1 + src/arena.c | 369 +++++++++++++++++++++++++++ src/memory.c | 80 ++++-- src/os.c | 4 +- 6 files changed, 435 insertions(+), 21 deletions(-) create mode 100644 src/arena.c diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 96a8924f..09fd37fb 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -231,6 +231,7 @@ + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 28e96d71..1fabff5e 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -217,6 +217,7 @@ + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index ccf12a06..2b881ac9 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -57,6 +57,7 @@ void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocat void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); + // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); diff --git a/src/arena.c b/src/arena.c new file mode 100644 index 00000000..5f33965a --- /dev/null +++ b/src/arena.c @@ -0,0 +1,369 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- + +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset + +// os.c +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); +void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); +int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + +/* ----------------------------------------------------------- + Arena allocation +----------------------------------------------------------- */ + +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE +#define MI_ARENA_BLOCK_SIZE (4*MI_SEGMENT_ALIGN) // 16MiB +#define MI_MAX_ARENAS (64) + +// Block info: bit 0 contains the `in_use` bit, the upper bits the +// size in count of arena blocks. +typedef uintptr_t mi_block_info_t; + +// A memory arena descriptor +typedef struct mi_arena_s { + uint8_t* start; // the start of the memory area + size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + bool is_zero_init; // is the arena zero initialized? + bool is_large; // large OS page allocated + _Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks + _Atomic(mi_block_info_t) blocks[1]; // `block_count` block info's +} mi_arena_t; + + +// The available arenas +static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; +static _Atomic(uintptr_t) mi_arena_count; // = 0 + + +/* ----------------------------------------------------------- + Arena allocations get a memory id where the lower 8 bits are + the arena index +1, and the upper bits the block index. +----------------------------------------------------------- */ + +// Use `0` as a special id for direct OS allocated memory. +#define MI_MEMID_OS 0 + +static size_t mi_memid_create(size_t arena_index, size_t block_index) { + mi_assert_internal(arena_index < 0xFE); + return ((block_index << 8) | ((arena_index+1) & 0xFF)); +} + +static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) { + mi_assert_internal(memid != MI_MEMID_OS); + *arena_index = (memid & 0xFF) - 1; + *block_index = (memid >> 8); +} + +/* ----------------------------------------------------------- + Block info +----------------------------------------------------------- */ + +static bool mi_block_is_in_use(mi_block_info_t info) { + return ((info&1) != 0); +} + +static size_t mi_block_count(mi_block_info_t info) { + return (info>>1); +} + +static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) { + return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0)); +} + + +/* ----------------------------------------------------------- + Thread safe allocation in an arena +----------------------------------------------------------- */ + +static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index) +{ + // Scan linearly through all block info's + // Skipping used ranges, coalescing free ranges on demand. + mi_assert_internal(needed_bcount > 0); + mi_assert_internal(start_idx <= arena->block_count); + mi_assert_internal(end_idx <= arena->block_count); + _Atomic(mi_block_info_t)* block = &arena->blocks[start_idx]; + _Atomic(mi_block_info_t)* end = &arena->blocks[end_idx]; + while (block < end) { + mi_block_info_t binfo = mi_atomic_read_relaxed(block); + size_t bcount = mi_block_count(binfo); + if (mi_block_is_in_use(binfo)) { + // in-use, skip ahead + mi_assert_internal(bcount > 0); + block += bcount; + } + else { + // free blocks + if (bcount==0) { + // optimization: + // use 0 initialized blocks at the end, to use single atomic operation + // initially to reduce contention (as we don't need to split) + if (block + needed_bcount > end) { + return NULL; // does not fit + } + else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) { + // ouch, someone else was quicker. Try again.. + continue; + } + else { + // we got it: return a pointer to the claimed memory + ptrdiff_t idx = (block - arena->blocks); + *is_zero = arena->is_zero_init; + *block_index = idx; + return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); + } + } + + mi_assert_internal(bcount>0); + if (needed_bcount > bcount) { +#if 0 // MI_NO_ARENA_COALESCE + block += bcount; // too small, skip to the next range + continue; +#else + // too small, try to coalesce + _Atomic(mi_block_info_t)* block_next = block + bcount; + if (block_next >= end) { + return NULL; // does not fit + } + mi_block_info_t binfo_next = mi_atomic_read(block_next); + size_t bcount_next = mi_block_count(binfo_next); + if (mi_block_is_in_use(binfo_next)) { + // next block is in use, cannot coalesce + block += (bcount + bcount_next); // skip ahea over both blocks + } + else { + // next block is free, try to coalesce + // first set the next one to being used to prevent dangling ranges + if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) { + // someone else got in before us.. try again + continue; + } + else { + if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) { // use strong to increase success chance + // someone claimed/coalesced the block in the meantime + // first free the next block again.. + bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong + mi_assert(ok); UNUSED(ok); + // and try again + continue; + } + else { + // coalesced! try again + // todo: we could optimize here to immediately claim the block if the + // coalesced size is a fit instead of retrying. Keep it simple for now. + continue; + } + } + } +#endif + } + else { // needed_bcount <= bcount + mi_assert_internal(needed_bcount <= bcount); + // it fits, claim the whole block + if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) { + // ouch, someone else was quicker. Try again.. + continue; + } + else { + // got it, now split off the needed part + if (needed_bcount < bcount) { + mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false)); + mi_atomic_write(block, mi_block_info_create(needed_bcount, true)); + } + // return a pointer to the claimed memory + ptrdiff_t idx = (block - arena->blocks); + *is_zero = false; + *block_index = idx; + return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); + } + } + } + } + // no success + return NULL; +} + +// Try to reduce search time by starting from bottom and wrap around. +static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index) +{ + uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom); + void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index); + if (p == NULL && bottom > 0) { + // try again from the start + p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index); + } + if (p != NULL) { + mi_atomic_write(&arena->block_bottom, *block_index); + } + return p; +} + +/* ----------------------------------------------------------- + Arena Allocation +----------------------------------------------------------- */ + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { + mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(size > 0); + *memid = MI_MEMID_OS; + *is_zero = false; + bool default_large = false; + if (large==NULL) large = &default_large; // ensure `large != NULL` + + // try to allocate in an arena if the alignment is small enough + // and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`. + if (alignment <= MI_SEGMENT_ALIGN && + size >= 3*(MI_ARENA_BLOCK_SIZE/4) && // > 12MiB (not more than 25% waste) + !(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <16MiB - 24MiB> + ) + { + size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); + size_t bcount = asize / MI_ARENA_BLOCK_SIZE; + + mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + for (size_t i = 0; i < MI_MAX_ARENAS; i++) { + mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); + if (arena==NULL) break; + if (*large || !arena->is_large) { // large OS pages allowed, or arena is not large OS pages + size_t block_index = SIZE_MAX; + void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index); + if (p != NULL) { + mi_assert_internal(block_index != SIZE_MAX); + #if MI_DEBUG>=1 + _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; + mi_block_info_t binfo = mi_atomic_read(block); + mi_assert_internal(mi_block_is_in_use(binfo)); + mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); + #endif + *memid = mi_memid_create(i, block_index); + *commit = true; // TODO: support commit on demand? + *large = arena->is_large; + mi_assert_internal((uintptr_t)p % alignment == 0); + return p; + } + } + } + } + + // fall back to the OS + *is_zero = true; + *memid = MI_MEMID_OS; + return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); +} + +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld); +} + +/* ----------------------------------------------------------- + Arena free +----------------------------------------------------------- */ + +void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { + mi_assert_internal(size > 0 && stats != NULL); + if (p==NULL) return; + if (size==0) return; + if (memid == MI_MEMID_OS) { + // was a direct OS allocation, pass through + _mi_os_free(p, size, stats); + } + else { + // allocated in an arena + size_t arena_idx; + size_t block_idx; + mi_memid_indices(memid, &arena_idx, &block_idx); + mi_assert_internal(arena_idx < MI_MAX_ARENAS); + mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); + mi_assert_internal(arena != NULL); + if (arena == NULL) { + _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + mi_assert_internal(arena->block_count > block_idx); + if (arena->block_count <= block_idx) { + _mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + _Atomic(mi_block_info_t)* block = &arena->blocks[block_idx]; + mi_block_info_t binfo = mi_atomic_read_relaxed(block); + mi_assert_internal(mi_block_is_in_use(binfo)); + mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); + if (!mi_block_is_in_use(binfo)) { + _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); + return; + }; + bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo); + mi_assert_internal(ok); + if (!ok) { + _mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo); + } + if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) { + mi_atomic_write(&arena->block_bottom, block_idx); + } + } +} + +/* ----------------------------------------------------------- + Add an arena. +----------------------------------------------------------- */ + +static bool mi_arena_add(mi_arena_t* arena) { + mi_assert_internal(arena != NULL); + mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0); + mi_assert_internal(arena->block_count > 0); + mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t))); + + uintptr_t i = mi_atomic_addu(&mi_arena_count,1); + if (i >= MI_MAX_ARENAS) { + mi_atomic_subu(&mi_arena_count, 1); + return false; + } + mi_atomic_write_ptr(mi_atomic_cast(void*,&mi_arenas[i]), arena); + return true; +} + + +/* ----------------------------------------------------------- + Reserve a huge page arena. + TODO: improve OS api to just reserve and claim a huge + page area at once, (and return the total size). +----------------------------------------------------------- */ + +#include + +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + size_t pages_reserved_default = 0; + if (pages_reserved==NULL) pages_reserved = &pages_reserved_default; + int err = _mi_os_reserve_huge_os_pages(pages, max_secs, pages_reserved); + if (*pages_reserved==0) return err; + size_t hsize = (*pages_reserved) * GiB; + void* p = _mi_os_try_alloc_from_huge_reserved(hsize, MI_SEGMENT_ALIGN); + mi_assert_internal(p != NULL); + if (p == NULL) return ENOMEM; + size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; + size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_heap_default->tld->stats); + if (arena == NULL) return ENOMEM; + arena->block_count = bcount; + arena->start = (uint8_t*)p; + arena->block_bottom = 0; + arena->is_large = true; + arena->is_zero_init = true; + memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t)); + //mi_atomic_write(&arena->blocks[0], mi_block_info_create(bcount, false)); + mi_arena_add(arena); + return 0; +} diff --git a/src/memory.c b/src/memory.c index dd03cf95..9ab7c850 100644 --- a/src/memory.c +++ b/src/memory.c @@ -50,6 +50,12 @@ void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* sta void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); bool _mi_os_is_huge_reserved(void* p); +// arena.c +void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); + + // Constants #if (MI_INTPTR_SIZE==8) #define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map @@ -87,6 +93,7 @@ typedef struct mem_region_s { volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd + size_t arena_memid; } mem_region_t; @@ -131,6 +138,30 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { } +static size_t mi_memid_create(size_t idx, size_t bitidx) { + return ((idx*MI_REGION_MAP_BITS) + bitidx)<<1; +} + +static size_t mi_memid_create_from_arena(size_t arena_memid) { + return (arena_memid << 1) | 1; +} + +static bool mi_memid_is_arena(size_t id) { + return ((id&1)==1); +} + +static bool mi_memid_indices(size_t id, size_t* idx, size_t* bitidx, size_t* arena_memid) { + if (mi_memid_is_arena(id)) { + *arena_memid = (id>>1); + return true; + } + else { + *idx = ((id>>1) / MI_REGION_MAP_BITS); + *bitidx = ((id>>1) % MI_REGION_MAP_BITS); + return false; + } +} + /* ---------------------------------------------------------------------------- Commit from a region -----------------------------------------------------------------------------*/ @@ -153,6 +184,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit { bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); bool region_large = *allow_large; + size_t arena_memid = 0; + void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, is_zero, &arena_memid, tld); + /* void* start = NULL; if (region_large) { start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN); @@ -161,6 +195,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit if (start == NULL) { start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); } + */ mi_assert_internal(!(region_large && !*allow_large)); if (start == NULL) { @@ -176,6 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit info = mi_region_info_create(start,region_large,region_commit); if (mi_atomic_cas_strong(®ion->info, info, 0)) { // update the region count + region->arena_memid = arena_memid; mi_atomic_increment(®ions_count); } else { @@ -183,6 +219,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // we assign it to a later slot instead (up to 4 tries). for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { + regions[idx+i].arena_memid = arena_memid; mi_atomic_increment(®ions_count); start = NULL; break; @@ -190,7 +227,8 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit } if (start != NULL) { // free it if we didn't succeed to save it to some other region - _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); + // _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); } // and continue with the memory at our index info = mi_atomic_read(®ion->info); @@ -229,7 +267,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit mi_assert_internal(blocks_start != NULL); *allow_large = region_is_large; *p = blocks_start; - *id = (idx*MI_REGION_MAP_BITS) + bitidx; + *id = mi_memid_create(idx, bitidx); return true; } @@ -269,7 +307,7 @@ static inline size_t mi_bsr(uintptr_t x) { // Allocate `blocks` in a `region` at `idx` of a given `size`. // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. +// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) @@ -366,15 +404,17 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l { mi_assert_internal(id != NULL && tld != NULL); mi_assert_internal(size > 0); - *id = SIZE_MAX; + *id = 0; *is_zero = false; bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` - // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`) + // use direct OS allocation for huge blocks or alignment if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { - *is_zero = true; - return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld); // round up size + size_t arena_memid = 0; + void* p = _mi_arena_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, is_zero, &arena_memid, tld); // round up size + *id = mi_memid_create_from_arena(arena_memid); + return p; } // always round size to OS page size multiple (so commit/decommit go over the entire range) @@ -405,9 +445,10 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l if (p == NULL) { // we could not find a place to allocate, fall back to the os directly - _mi_warning_message("unable to allocate from region: size %zu\n", size); - *is_zero = true; - p = _mi_os_alloc_aligned(size, alignment, commit, large, tld); + _mi_warning_message("unable to allocate from region: size %zu\n", size); + size_t arena_memid = 0; + p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); + *id = mi_memid_create_from_arena(arena_memid); } else { tld->region_idx = idx; // next start of search? currently not used as we use first-fit @@ -428,18 +469,19 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); if (p==NULL) return; if (size==0) return; - if (id == SIZE_MAX) { - // was a direct OS allocation, pass through - _mi_os_free(p, size, stats); + size_t arena_memid = 0; + size_t idx = 0; + size_t bitidx = 0; + if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) { + // was a direct arena allocation, pass through + _mi_arena_free(p, size, arena_memid, stats); } else { // allocated in a region mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; // we can align the size up to page size (as we allocate that way too) // this ensures we fully commit/decommit/reset - size = _mi_align_up(size, _mi_os_page_size()); - size_t idx = (id / MI_REGION_MAP_BITS); - size_t bitidx = (id % MI_REGION_MAP_BITS); + size = _mi_align_up(size, _mi_os_page_size()); size_t blocks = mi_region_block_count(size); size_t mask = mi_region_block_mask(blocks, bitidx); mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? @@ -503,11 +545,11 @@ void _mi_mem_collect(mi_stats_t* stats) { m = mi_atomic_read_relaxed(®ion->map); } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); if (m == 0) { - // on success, free the whole region (unless it was huge reserved) + // on success, free the whole region bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); - if (start != NULL && !_mi_os_is_huge_reserved(start)) { - _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats); + if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { + _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats); } // and release mi_atomic_write(®ion->info,0); diff --git a/src/os.c b/src/os.c index 8f5afc5b..85cd1a83 100644 --- a/src/os.c +++ b/src/os.c @@ -869,13 +869,13 @@ static void mi_os_free_huge_reserved() { */ #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { +int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { UNUSED(pages); UNUSED(max_secs); if (pages_reserved != NULL) *pages_reserved = 0; return ENOMEM; } #else -int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept +int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept { if (pages_reserved != NULL) *pages_reserved = 0; if (max_secs==0) return ETIMEDOUT; // timeout From aaf01620f4e878d48a4d2815bd0d894f28a5f093 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 31 Oct 2019 19:39:49 -0700 Subject: [PATCH 037/293] improve allocation of the huge OS page arena --- src/arena.c | 39 ++++++++++++------- src/os.c | 110 +++++++++++++++++----------------------------------- 2 files changed, 60 insertions(+), 89 deletions(-) diff --git a/src/arena.c b/src/arena.c index 5f33965a..469755f2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -6,7 +6,16 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- +"Arenas" are fixed area's of OS memory from which we can allocate +large blocks (>= MI_ARENA_BLOCK_SIZE, 16MiB). Currently only used to +allocate in one arena consisting of huge OS pages -- otherwise it +delegates to direct allocation from the OS. +In the future, we can expose an API to manually add more arenas which +is sometimes needed for embedded devices or shared memory for example. + +The arena allocation needs to be thread safe and we use a lock-free scan +with on-demand coalescing. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" @@ -16,8 +25,8 @@ terms of the MIT license. A copy of the license can be found in the file // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); -int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; +int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; +void _mi_os_free(void* p, size_t size, mi_stats_t* stats); /* ----------------------------------------------------------- Arena allocation @@ -338,25 +347,27 @@ static bool mi_arena_add(mi_arena_t* arena) { /* ----------------------------------------------------------- Reserve a huge page arena. - TODO: improve OS api to just reserve and claim a huge - page area at once, (and return the total size). ----------------------------------------------------------- */ - -#include +#include // ENOMEM int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { size_t pages_reserved_default = 0; if (pages_reserved==NULL) pages_reserved = &pages_reserved_default; - int err = _mi_os_reserve_huge_os_pages(pages, max_secs, pages_reserved); - if (*pages_reserved==0) return err; - size_t hsize = (*pages_reserved) * GiB; - void* p = _mi_os_try_alloc_from_huge_reserved(hsize, MI_SEGMENT_ALIGN); - mi_assert_internal(p != NULL); - if (p == NULL) return ENOMEM; + size_t hsize = 0; + void* p = NULL; + int err = _mi_os_alloc_huge_os_pages(pages, max_secs, &p, pages_reserved, &hsize); + _mi_verbose_message("reserved %zu huge pages\n", *pages_reserved); + if (p==NULL) return err; + // err might be != 0 but that is fine, we just got less pages. + mi_assert_internal(*pages_reserved > 0 && hsize > 0 && *pages_reserved <= pages); size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_heap_default->tld->stats); - if (arena == NULL) return ENOMEM; + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); + if (arena == NULL) { + *pages_reserved = 0; + _mi_os_free(p, hsize, &_mi_stats_main); + return ENOMEM; + } arena->block_count = bcount; arena->start = (uint8_t*)p; arena->block_bottom = 0; diff --git a/src/os.c b/src/os.c index 85cd1a83..b7bffa64 100644 --- a/src/os.c +++ b/src/os.c @@ -36,8 +36,6 @@ terms of the MIT license. A copy of the license can be found in the file large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). ----------------------------------------------------------- */ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -bool _mi_os_is_huge_reserved(void* p); -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); static void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); @@ -184,7 +182,7 @@ void _mi_os_init() { static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) { - if (addr == NULL || size == 0 || _mi_os_is_huge_reserved(addr)) return true; + if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr) bool err = false; #if defined(_WIN32) err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); @@ -628,7 +626,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ *is_zero = false; size_t csize; void* start = mi_os_page_align_areax(conservative, addr, size, &csize); - if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; + if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) int err = 0; if (commit) { _mi_stat_increase(&stats->committed, csize); @@ -684,7 +682,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); - if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; + if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) if (reset) _mi_stat_increase(&stats->reset, csize); else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! @@ -758,9 +756,11 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { size_t csize = 0; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return false; + /* if (_mi_os_is_huge_reserved(addr)) { _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } + */ int err = 0; #ifdef _WIN32 DWORD oldprotect = 0; @@ -816,79 +816,42 @@ will be reused. -----------------------------------------------------------------------------*/ #define MI_HUGE_OS_PAGE_SIZE ((size_t)1 << 30) // 1GiB -typedef struct mi_huge_info_s { - volatile _Atomic(void*) start; // start of huge page area (32TiB) - volatile _Atomic(size_t) reserved; // total reserved size - volatile _Atomic(size_t) used; // currently allocated -} mi_huge_info_t; - -static mi_huge_info_t os_huge_reserved = { NULL, 0, ATOMIC_VAR_INIT(0) }; - -bool _mi_os_is_huge_reserved(void* p) { - return (mi_atomic_read_ptr(&os_huge_reserved.start) != NULL && - p >= mi_atomic_read_ptr(&os_huge_reserved.start) && - (uint8_t*)p < (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + mi_atomic_read(&os_huge_reserved.reserved)); -} - -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment) -{ - // only allow large aligned allocations (e.g. regions) - if (size < MI_SEGMENT_SIZE || (size % MI_SEGMENT_SIZE) != 0) return NULL; - if (try_alignment > MI_SEGMENT_SIZE) return NULL; - if (mi_atomic_read_ptr(&os_huge_reserved.start)==NULL) return NULL; - if (mi_atomic_read(&os_huge_reserved.used) >= mi_atomic_read(&os_huge_reserved.reserved)) return NULL; // already full - - // always aligned - mi_assert_internal(mi_atomic_read(&os_huge_reserved.used) % MI_SEGMENT_SIZE == 0 ); - mi_assert_internal( (uintptr_t)mi_atomic_read_ptr(&os_huge_reserved.start) % MI_SEGMENT_SIZE == 0 ); - - // try to reserve space - size_t base = mi_atomic_addu( &os_huge_reserved.used, size ); - if ((base + size) > os_huge_reserved.reserved) { - // "free" our over-allocation - mi_atomic_subu( &os_huge_reserved.used, size); - return NULL; - } - - // success! - uint8_t* p = (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + base; - mi_assert_internal( (uintptr_t)p % MI_SEGMENT_SIZE == 0 ); - return p; -} - -/* -static void mi_os_free_huge_reserved() { - uint8_t* addr = os_huge_reserved.start; - size_t total = os_huge_reserved.reserved; - os_huge_reserved.reserved = 0; - os_huge_reserved.start = NULL; - for( size_t current = 0; current < total; current += MI_HUGE_OS_PAGE_SIZE) { - _mi_os_free(addr + current, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); - } -} -*/ #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { +int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** start, size_t* pages_reserved, size_t* size) mi_attr_noexcept { UNUSED(pages); UNUSED(max_secs); + if (start != NULL) *start = NULL; if (pages_reserved != NULL) *pages_reserved = 0; + if (size != NULL) *size = 0; return ENOMEM; } #else -int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept +static _Atomic(uintptr_t) huge_top; // = 0 + +int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept { - if (pages_reserved != NULL) *pages_reserved = 0; + *pstart = NULL; + *pages_reserved = 0; + *psize = 0; if (max_secs==0) return ETIMEDOUT; // timeout if (pages==0) return 0; // ok - if (!mi_atomic_cas_ptr_strong(&os_huge_reserved.start,(void*)1,NULL)) return ETIMEDOUT; // already reserved - // Set the start address after the 32TiB area - uint8_t* start = (uint8_t*)((uintptr_t)32 << 40); // 32TiB virtual start address - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&mi_reserve_huge_os_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB - #endif + // Atomically claim a huge address range + size_t size = pages * MI_HUGE_OS_PAGE_SIZE; + uint8_t* start; + do { + start = (uint8_t*)mi_atomic_addu(&huge_top, size); + if (start == NULL) { + uintptr_t top = ((uintptr_t)32 << 40); // 32TiB virtual start address + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages); + top += ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + #endif + mi_atomic_cas_strong(&huge_top, top, 0); + } + } while (start == NULL); + // Allocate one page at the time but try to place them contiguously // We allocate one page at the time to be able to abort if it takes too long double start_t = _mi_clock_start(); @@ -925,16 +888,13 @@ int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_r } // success, record it if (page==0) { - mi_atomic_write_ptr(&os_huge_reserved.start, addr); // don't switch the order of these writes - mi_atomic_write(&os_huge_reserved.reserved, MI_HUGE_OS_PAGE_SIZE); + *pstart = addr; } - else { - mi_atomic_addu(&os_huge_reserved.reserved,MI_HUGE_OS_PAGE_SIZE); - } - _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); + *psize += MI_HUGE_OS_PAGE_SIZE; + *pages_reserved += 1; + _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - if (pages_reserved != NULL) { *pages_reserved = page + 1; } - + // check for timeout double elapsed = _mi_clock_end(start_t); if (elapsed > max_secs) return ETIMEDOUT; @@ -943,7 +903,7 @@ int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_r if (estimate > 1.5*max_secs) return ETIMEDOUT; // seems like we are going to timeout } } - _mi_verbose_message("reserved %zu huge pages\n", pages); + mi_assert_internal(*psize == size); return 0; } #endif From a6499be074a52232ed131eeabb3bd8040f2743c3 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 Nov 2019 19:53:07 -0700 Subject: [PATCH 038/293] initial numa support for arenas --- include/mimalloc-internal.h | 1 + include/mimalloc.h | 8 +- src/arena.c | 128 +++++++++++++----- src/init.c | 2 +- src/options.c | 3 +- src/os.c | 252 +++++++++++++++++++----------------- 6 files changed, 241 insertions(+), 153 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 2b881ac9..dd677a02 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -56,6 +56,7 @@ void _mi_os_init(void); // called fro void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); +int _mi_os_numa_node(void); // memory.c diff --git a/include/mimalloc.h b/include/mimalloc.h index b63ed79d..b155aca6 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -228,9 +228,14 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b // Experimental mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept; + +// deprecated +mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + // ------------------------------------------------------ // Convenience // ------------------------------------------------------ @@ -271,6 +276,7 @@ typedef enum mi_option_e { mi_option_eager_commit_delay, mi_option_segment_reset, mi_option_os_tag, + mi_option_max_numa_node, _mi_option_last } mi_option_t; diff --git a/src/arena.c b/src/arena.c index 469755f2..5bc3900c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -25,8 +25,10 @@ with on-demand coalescing. // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; +//int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize); +int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- Arena allocation @@ -44,6 +46,7 @@ typedef uintptr_t mi_block_info_t; typedef struct mi_arena_s { uint8_t* start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? bool is_large; // large OS page allocated _Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks @@ -223,7 +226,31 @@ static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_ze Arena Allocation ----------------------------------------------------------- */ -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { +static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool* commit, bool* large, bool* is_zero, + size_t* memid) +{ + size_t block_index = SIZE_MAX; + void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index); + if (p != NULL) { + mi_assert_internal(block_index != SIZE_MAX); +#if MI_DEBUG>=1 + _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; + mi_block_info_t binfo = mi_atomic_read(block); + mi_assert_internal(mi_block_is_in_use(binfo)); + mi_assert_internal(mi_block_count(binfo) >= needed_bcount); +#endif + *memid = mi_memid_create(arena_index, block_index); + *commit = true; // TODO: support commit on demand? + *large = arena->is_large; + } + return p; +} + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, + bool* commit, bool* large, bool* is_zero, + size_t* memid, mi_os_tld_t* tld) +{ mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; @@ -240,33 +267,36 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* { size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); size_t bcount = asize / MI_ARENA_BLOCK_SIZE; + int numa_node = _mi_os_numa_node(); // current numa node mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + // try numa affine allocation for (size_t i = 0; i < MI_MAX_ARENAS; i++) { mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); - if (arena==NULL) break; - if (*large || !arena->is_large) { // large OS pages allowed, or arena is not large OS pages - size_t block_index = SIZE_MAX; - void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index); - if (p != NULL) { - mi_assert_internal(block_index != SIZE_MAX); - #if MI_DEBUG>=1 - _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; - mi_block_info_t binfo = mi_atomic_read(block); - mi_assert_internal(mi_block_is_in_use(binfo)); - mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); - #endif - *memid = mi_memid_create(i, block_index); - *commit = true; // TODO: support commit on demand? - *large = arena->is_large; - mi_assert_internal((uintptr_t)p % alignment == 0); - return p; - } + if (arena==NULL) break; // end reached + if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; + } + } + // try from another numa node instead.. + for (size_t i = 0; i < MI_MAX_ARENAS; i++) { + mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); + if (arena==NULL) break; // end reached + if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; } } } - // fall back to the OS + // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); @@ -350,31 +380,61 @@ static bool mi_arena_add(mi_arena_t* arena) { ----------------------------------------------------------- */ #include // ENOMEM -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { - size_t pages_reserved_default = 0; - if (pages_reserved==NULL) pages_reserved = &pages_reserved_default; +// reserve at a specific numa node +static int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { size_t hsize = 0; - void* p = NULL; - int err = _mi_os_alloc_huge_os_pages(pages, max_secs, &p, pages_reserved, &hsize); - _mi_verbose_message("reserved %zu huge pages\n", *pages_reserved); - if (p==NULL) return err; - // err might be != 0 but that is fine, we just got less pages. - mi_assert_internal(*pages_reserved > 0 && hsize > 0 && *pages_reserved <= pages); + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize); + if (p==NULL) return ENOMEM; + _mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages); + size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; - size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); + size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { - *pages_reserved = 0; _mi_os_free(p, hsize, &_mi_stats_main); return ENOMEM; } arena->block_count = bcount; arena->start = (uint8_t*)p; arena->block_bottom = 0; + arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = true; arena->is_zero_init = true; memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t)); - //mi_atomic_write(&arena->blocks[0], mi_block_info_create(bcount, false)); mi_arena_add(arena); return 0; } + + +// reserve huge pages evenly among all numa nodes. +int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { + if (pages == 0) return 0; + + // pages per numa node + int numa_count = _mi_os_numa_node_count(); + if (numa_count <= 0) numa_count = 1; + size_t pages_per = pages / numa_count; + if (pages_per == 0) pages_per = 1; + + // reserve evenly among numa nodes + for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + int err = mi_reserve_huge_os_pages_at((pages_per > pages ? pages : pages_per), numa_node); + if (err) return err; + if (pages < pages_per) { + pages = 0; + } + else { + pages -= pages_per; + } + } + + return 0; +} + +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + _mi_verbose_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); + if (pages_reserved != NULL) *pages_reserved = 0; + int err = mi_reserve_huge_os_pages_interleave(pages); + if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; + return err; +} diff --git a/src/init.c b/src/init.c index e15d82eb..138b54aa 100644 --- a/src/init.c +++ b/src/init.c @@ -435,7 +435,7 @@ static void mi_process_load(void) { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) - mi_reserve_huge_os_pages(pages, max_secs, NULL); + mi_reserve_huge_os_pages_interleave(pages); } } diff --git a/src/options.c b/src/options.c index a49c46ed..32f13d54 100644 --- a/src/options.c +++ b/src/options.c @@ -66,7 +66,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 100, UNINIT, MI_OPTION(os_tag) } // only apple specific for now but might serve more or less related purpose + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 256, UNINIT, MI_OPTION(max_numa_node) } // maximum allowed numa node }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index b7bffa64..c0564174 100644 --- a/src/os.c +++ b/src/os.c @@ -170,7 +170,7 @@ void _mi_os_init() { os_alloc_granularity = os_page_size; } if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = (1UL << 21); // 2MiB + large_os_page_size = 2*MiB; } } #endif @@ -207,31 +207,6 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); #ifdef _WIN32 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { -#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - if ((size % ((uintptr_t)1 << 30)) == 0 /* 1GiB multiple */ - && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0 - && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0) - && pNtAllocateVirtualMemoryEx != NULL) - { - #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE - #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) - #endif - MEM_EXTENDED_PARAMETER param = { 0, 0 }; - param.Type = 5; // == MemExtendedParameterAttributeFlags; - param.ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - SIZE_T psize = size; - void* base = addr; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, ¶m, 1); - if (err == 0) { - return base; - } - else { - // else fall back to regular large OS pages - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error 0x%lx)\n", err); - } - } -#endif #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; @@ -364,7 +339,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro lflags |= MAP_HUGETLB; #endif #ifdef MAP_HUGE_1GB - if ((size % ((uintptr_t)1 << 30)) == 0) { + if ((size % GiB) == 0) { lflags |= MAP_HUGE_1GB; } else @@ -400,10 +375,10 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead - // transparent huge pages (TPH). It is not required to call `madvise` with MADV_HUGE + // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available // in that case -- in particular for our large regions (in `memory.c`). - // However, some systems only allow TPH if called with explicit `madvise`, so + // However, some systems only allow THP if called with explicit `madvise`, so // when large OS pages are enabled for mimalloc, we call `madvice` anyways. if (allow_large && use_large_os_page(size, try_alignment)) { if (madvise(p, size, MADV_HUGEPAGE) == 0) { @@ -810,101 +785,146 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- -Support for huge OS pages (1Gib) that are reserved up-front and never -released. Only regions are allocated in here (see `memory.c`) so the memory -will be reused. +Support for allocating huge OS pages (1Gib) that are reserved up-front +and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ -#define MI_HUGE_OS_PAGE_SIZE ((size_t)1 << 30) // 1GiB +#define MI_HUGE_OS_PAGE_SIZE (GiB) +#if defined(WIN32) && (MI_INTPTR_SIZE >= 8) +static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) +{ + mi_assert_internal(size%GiB == 0); -#if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** start, size_t* pages_reserved, size_t* size) mi_attr_noexcept { - UNUSED(pages); UNUSED(max_secs); - if (start != NULL) *start = NULL; - if (pages_reserved != NULL) *pages_reserved = 0; - if (size != NULL) *size = 0; - return ENOMEM; -} -#else -static _Atomic(uintptr_t) huge_top; // = 0 - -int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept -{ - *pstart = NULL; - *pages_reserved = 0; - *psize = 0; - if (max_secs==0) return ETIMEDOUT; // timeout - if (pages==0) return 0; // ok - - // Atomically claim a huge address range - size_t size = pages * MI_HUGE_OS_PAGE_SIZE; - uint8_t* start; - do { - start = (uint8_t*)mi_atomic_addu(&huge_top, size); - if (start == NULL) { - uintptr_t top = ((uintptr_t)32 << 40); // 32TiB virtual start address - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages); - top += ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB - #endif - mi_atomic_cas_strong(&huge_top, top, 0); - } - } while (start == NULL); - + #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) + DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; + MEM_EXTENDED_PARAMETER params[4] = { {0,0},{0,0},{0,0},{0,0} }; + MEM_ADDRESS_REQUIREMENTS reqs = {0,0,0}; + reqs.HighestEndingAddress = NULL; + reqs.LowestStartingAddress = NULL; + reqs.Alignment = MI_SEGMENT_SIZE; - // Allocate one page at the time but try to place them contiguously - // We allocate one page at the time to be able to abort if it takes too long - double start_t = _mi_clock_start(); - uint8_t* addr = start; // current top of the allocations - for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) { - // allocate a page - void* p = NULL; - bool is_large = true; - #ifdef _WIN32 - if (page==0) { mi_win_enable_large_os_pages(); } - p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true, true, &is_large); - #elif defined(MI_OS_USE_MMAP) - p = mi_unix_mmap(addr, MI_HUGE_OS_PAGE_SIZE, 0, PROT_READ | PROT_WRITE, true, true, &is_large); - #else - // always fail - #endif - - // Did we succeed at a contiguous address? - if (p != addr) { - // no success, issue a warning and return with an error - if (p != NULL) { - _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); - _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main ); - } - else { - #ifdef _WIN32 - int err = GetLastError(); - #else - int err = errno; - #endif - _mi_warning_message("could not allocate huge page %zu at 0x%p, error: %i\n", page, addr, err); - } - return ENOMEM; + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + if (pNtAllocateVirtualMemoryEx != NULL) { + #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE + #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) + #endif + params[0].Type = MemExtendedParameterAddressRequirements; + params[0].Pointer = &reqs; + params[1].Type = 5; // == MemExtendedParameterAttributeFlags; + params[1].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; + size_t param_count = 2; + if (numa_node >= 0) { + param_count++; + params[2].Type = MemExtendedParameterNumaNode; + params[2].ULong = (unsigned)numa_node; } - // success, record it - if (page==0) { - *pstart = addr; + SIZE_T psize = size; + void* base = NULL; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + if (err == 0) { + return base; } - *psize += MI_HUGE_OS_PAGE_SIZE; - *pages_reserved += 1; - _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); - _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - - // check for timeout - double elapsed = _mi_clock_end(start_t); - if (elapsed > max_secs) return ETIMEDOUT; - if (page >= 1) { - double estimate = ((elapsed / (double)(page+1)) * (double)pages); - if (estimate > 1.5*max_secs) return ETIMEDOUT; // seems like we are going to timeout + else { + // fall back to regular huge pages + _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (error 0x%lx)\n", err); } } - mi_assert_internal(*psize == size); - return 0; + // on modern Windows try use VirtualAlloc2 for aligned large OS page allocation + if (pVirtualAlloc2 != NULL) { + params[0].Type = MemExtendedParameterAddressRequirements; + params[0].Pointer = &reqs; + size_t param_count = 1; + if (numa_node >= 0) { + param_count++; + params[1].Type = MemExtendedParameterNumaNode; + params[1].ULong = (unsigned)numa_node; + } + return (*pVirtualAlloc2)(GetCurrentProcess(), NULL, size, flags, PAGE_READWRITE, params, param_count); + } + #endif + return NULL; // give up on older Windows.. +} +#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) +#ifdef MI_HAS_NUMA +#include // mbind, and use -lnuma +#endif +static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { + mi_assert_internal(size%GiB == 0); + bool is_large = true; + void* p = mi_unix_mmap(NULL, MI_HUGE_OS_PAGE_SIZE, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (p == NULL) return NULL; + #ifdef MI_HAS_NUMA + if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { + uintptr_t numa_mask = (1UL << numa_node); + long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + if (err != 0) { + _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); + } + } + #endif + return p; +} +#else +static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { + return NULL; } #endif +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { + if (psize != NULL) *psize = 0; + size_t size = pages * MI_HUGE_OS_PAGE_SIZE; + void* p = mi_os_alloc_huge_os_pagesx(size, numa_node); + if (p==NULL) return NULL; + if (psize != NULL) *psize = size; + _mi_stat_increase(&_mi_stats_main.committed, size); + _mi_stat_increase(&_mi_stats_main.reserved, size); + return p; +} + +#ifdef WIN32 +static int mi_os_numa_nodex(void) { + PROCESSOR_NUMBER pnum; + USHORT numa_node = 0; + GetCurrentProcessorNumberEx(&pnum); + GetNumaProcessorNodeEx(&pnum,&numa_node); + return (int)numa_node; +} + +static int mi_os_numa_node_countx(void) { + ULONG numa_max = 0; + GetNumaHighestNodeNumber(&numa_max); + return (int)(numa_max + 1); +} +#elif MI_HAS_NUMA +#include +static int mi_os_numa_nodex(void) { + return numa_preferred(); +} +static int mi_os_numa_node_countx(void) { + return (numa_max_node() + 1); +} +#else +static int mi_os_numa_nodex(void) { + return 0; +} +static int mi_os_numa_node_countx(void) { + return 1; +} +#endif + +int _mi_os_numa_node_count(void) { + long ncount = mi_os_numa_node_countx(); + // never more than max numa node and at least 1 + long nmax = 1 + mi_option_get(mi_option_max_numa_node); + if (ncount > nmax) ncount = nmax; + if (ncount <= 0) ncount = 1; + return ncount; +} + +int _mi_os_numa_node(void) { + int nnode = mi_os_numa_nodex(); + // never more than the node count + int ncount = _mi_os_numa_node_count(); + if (nnode >= ncount) { nnode = nnode % ncount; } + return nnode; +} From 3fadf4abaf5ee91c38c6e593a1faabb28d9ab2f9 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 Nov 2019 20:01:08 -0700 Subject: [PATCH 039/293] initial numa awareness for regions --- src/memory.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/memory.c b/src/memory.c index 9ab7c850..02e82e4d 100644 --- a/src/memory.c +++ b/src/memory.c @@ -45,10 +45,8 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); -bool _mi_os_is_huge_reserved(void* p); +//void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); +//void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); // arena.c void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); @@ -93,7 +91,8 @@ typedef struct mem_region_s { volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd - size_t arena_memid; + volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) + size_t arena_memid; // if allocated from a (huge page) arena } mem_region_t; @@ -212,6 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit if (mi_atomic_cas_strong(®ion->info, info, 0)) { // update the region count region->arena_memid = arena_memid; + mi_atomic_write(®ion->numa_node, _mi_os_numa_node() + 1); mi_atomic_increment(®ions_count); } else { @@ -220,6 +220,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { regions[idx+i].arena_memid = arena_memid; + mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node() + 1); mi_atomic_increment(®ions_count); start = NULL; break; @@ -365,15 +366,18 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, - void** p, size_t* id, mi_os_tld_t* tld) +static bool mi_region_try_alloc_blocks(int numa_node, size_t idx, size_t blocks, size_t size, + bool* commit, bool* allow_large, bool* is_zero, + void** p, size_t* id, mi_os_tld_t* tld) { // check if there are available blocks in the region.. mi_assert_internal(idx < MI_REGION_MAX); mem_region_t* region = ®ions[idx]; uintptr_t m = mi_atomic_read_relaxed(®ion->map); - if (m != MI_REGION_MAP_FULL) { // some bits are zero + int rnode = ((int)mi_atomic_read_relaxed(®ion->numa_node)) - 1; + if ((rnode < 0 || rnode == numa_node) && // fits current numa node + (m != MI_REGION_MAP_FULL)) // and some bits are zero + { bool ok = (*commit || *allow_large); // committing or allow-large is always ok if (!ok) { // otherwise skip incompatible regions if possible. @@ -426,19 +430,20 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); // find a range of free blocks + int numa_node = _mi_os_numa_node(); void* p = NULL; size_t count = mi_atomic_read(®ions_count); size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error + if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error if (p != NULL) break; } if (p == NULL) { // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions for (idx = count; idx < mi_atomic_read_relaxed(®ions_count) + 8 && idx < MI_REGION_MAX; idx++) { - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error + if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error if (p != NULL) break; } } From 2d10c78587d6cf781ffb40c24cb727ecff625841 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 Nov 2019 20:19:00 -0700 Subject: [PATCH 040/293] fix linux compilation --- CMakeLists.txt | 1 + src/arena.c | 3 ++- src/init.c | 2 +- src/os.c | 4 +++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 81cc339a..e9eb6feb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ set(mi_install_dir "lib/mimalloc-${mi_version}") set(mi_sources src/stats.c src/os.c + src/arena.c src/memory.c src/segment.c src/page.c diff --git a/src/arena.c b/src/arena.c index 5bc3900c..bb1c1c10 100644 --- a/src/arena.c +++ b/src/arena.c @@ -381,7 +381,7 @@ static bool mi_arena_add(mi_arena_t* arena) { #include // ENOMEM // reserve at a specific numa node -static int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { size_t hsize = 0; void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize); if (p==NULL) return ENOMEM; @@ -432,6 +432,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { } int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + UNUSED(max_secs); _mi_verbose_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; int err = mi_reserve_huge_os_pages_interleave(pages); diff --git a/src/init.c b/src/init.c index 138b54aa..0813fddd 100644 --- a/src/init.c +++ b/src/init.c @@ -434,7 +434,7 @@ static void mi_process_load(void) { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) + // double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) mi_reserve_huge_os_pages_interleave(pages); } } diff --git a/src/os.c b/src/os.c index c0564174..2bb3ee3c 100644 --- a/src/os.c +++ b/src/os.c @@ -851,7 +851,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { mi_assert_internal(size%GiB == 0); bool is_large = true; - void* p = mi_unix_mmap(NULL, MI_HUGE_OS_PAGE_SIZE, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { @@ -861,6 +861,8 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } } + #else + UNUSED(numa_node); #endif return p; } From 57dd69265ad294e7cdfcc13ef7ecb69b7c5d61b1 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 Nov 2019 20:30:01 -0700 Subject: [PATCH 041/293] normalize numa node --- src/arena.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/arena.c b/src/arena.c index bb1c1c10..381d4486 100644 --- a/src/arena.c +++ b/src/arena.c @@ -383,6 +383,8 @@ static bool mi_arena_add(mi_arena_t* arena) { // reserve at a specific numa node int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { size_t hsize = 0; + if (numa_node < -1) numa_node = -1; + if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize); if (p==NULL) return ENOMEM; _mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages); From 2c12d7f2234b25308478e22c9342a07623b6f891 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 Nov 2019 22:01:52 -0700 Subject: [PATCH 042/293] optimized numa calls; better Linux support --- CMakeLists.txt | 12 ++++ include/mimalloc-internal.h | 2 +- include/mimalloc-types.h | 1 + src/arena.c | 2 +- src/init.c | 3 +- src/memory.c | 6 +- src/os.c | 114 ++++++++++++++++++++++++------------ 7 files changed, 97 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9eb6feb..1e96c237 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.0) project(libmimalloc C CXX) include("cmake/mimalloc-config-version.cmake") +include("CheckIncludeFile") + set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) @@ -88,6 +90,16 @@ if(MI_USE_CXX MATCHES "ON") set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) endif() +CHECK_INCLUDE_FILE("numaif.h" MI_HAVE_NUMA_H) +if(MI_HAVE_NUMA_H) + list(APPEND mi_defines MI_HAS_NUMA) + list(APPEND mi_libraries numa) +else() + if (NOT(WIN32)) + message(WARNING "Compiling without using NUMA optimized allocation (on Linux, install libnuma-dev?)") + endif() +endif() + # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index dd677a02..b4d3351d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -56,7 +56,7 @@ void _mi_os_init(void); // called fro void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); -int _mi_os_numa_node(void); +int _mi_os_numa_node(mi_os_tld_t* tld); // memory.c diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 99b6b22b..0208d5c7 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -413,6 +413,7 @@ typedef struct mi_segments_tld_s { // OS thread local data typedef struct mi_os_tld_s { size_t region_idx; // start point for next allocation + int numa_node; // numa node associated with this thread mi_stats_t* stats; // points to tld stats } mi_os_tld_t; diff --git a/src/arena.c b/src/arena.c index 381d4486..7eb755c4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -267,7 +267,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, { size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); size_t bcount = asize / MI_ARENA_BLOCK_SIZE; - int numa_node = _mi_os_numa_node(); // current numa node + int numa_node = _mi_os_numa_node(tld); // current numa node mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation diff --git a/src/init.c b/src/init.c index 0813fddd..166ca451 100644 --- a/src/init.c +++ b/src/init.c @@ -99,7 +99,7 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments - { 0, tld_main_stats }, // os + { 0, -1, tld_main_stats }, // os { MI_STATS_NULL } // stats }; @@ -218,6 +218,7 @@ static bool _mi_heap_init(void) { memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; tld->segments.stats = &tld->stats; + tld->os.numa_node = -1; tld->os.stats = &tld->stats; _mi_heap_default = heap; } diff --git a/src/memory.c b/src/memory.c index 02e82e4d..a425393c 100644 --- a/src/memory.c +++ b/src/memory.c @@ -211,7 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit if (mi_atomic_cas_strong(®ion->info, info, 0)) { // update the region count region->arena_memid = arena_memid; - mi_atomic_write(®ion->numa_node, _mi_os_numa_node() + 1); + mi_atomic_write(®ion->numa_node, _mi_os_numa_node(tld) + 1); mi_atomic_increment(®ions_count); } else { @@ -220,7 +220,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { regions[idx+i].arena_memid = arena_memid; - mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node() + 1); + mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1); mi_atomic_increment(®ions_count); start = NULL; break; @@ -430,7 +430,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); // find a range of free blocks - int numa_node = _mi_os_numa_node(); + int numa_node = _mi_os_numa_node(tld); void* p = NULL; size_t count = mi_atomic_read(®ions_count); size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? diff --git a/src/os.c b/src/os.c index 2bb3ee3c..677d0ea2 100644 --- a/src/os.c +++ b/src/os.c @@ -97,7 +97,7 @@ typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T* static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; -static bool mi_win_enable_large_os_pages() +static bool mi_win_enable_large_os_pages() { if (large_os_page_size > 0) return true; @@ -148,10 +148,10 @@ void _mi_os_init(void) { FreeLibrary(hDll); } hDll = LoadLibrary(TEXT("ntdll.dll")); - if (hDll != NULL) { + if (hDll != NULL) { pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); FreeLibrary(hDll); - } + } if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { mi_win_enable_large_os_pages(); } @@ -191,7 +191,7 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats #else err = (munmap(addr, size) == -1); #endif - if (was_committed) _mi_stat_decrease(&stats->committed, size); + if (was_committed) _mi_stat_decrease(&stats->committed, size); _mi_stat_decrease(&stats->reserved, size); if (err) { #pragma warning(suppress:4996) @@ -207,14 +207,14 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); #ifdef _WIN32 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { -#if (MI_INTPTR_SIZE >= 8) +#if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { return VirtualAlloc(hint, size, flags, PAGE_READWRITE); } #endif -#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) +#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) // on modern Windows try use VirtualAlloc2 for aligned allocation if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { MEM_ADDRESS_REQUIREMENTS reqs = { 0 }; @@ -232,7 +232,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, mi_assert_internal(!(large_only && !allow_large)); static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; - if ((large_only || use_large_os_page(size, try_alignment)) + if ((large_only || use_large_os_page(size, try_alignment)) && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { @@ -372,7 +372,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro } if (p == NULL) { *is_large = false; - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); + p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE @@ -391,7 +391,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro } #endif -// On 64-bit systems, we can do efficient aligned allocation by using +// On 64-bit systems, we can do efficient aligned allocation by using // the 4TiB to 30TiB area to allocate them. #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) static volatile _Atomic(intptr_t) aligned_base; @@ -785,14 +785,14 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- -Support for allocating huge OS pages (1Gib) that are reserved up-front +Support for allocating huge OS pages (1Gib) that are reserved up-front and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ -#define MI_HUGE_OS_PAGE_SIZE (GiB) +#define MI_HUGE_OS_PAGE_SIZE (GiB) #if defined(WIN32) && (MI_INTPTR_SIZE >= 8) -static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) -{ +static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) +{ mi_assert_internal(size%GiB == 0); #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) @@ -802,8 +802,8 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) reqs.HighestEndingAddress = NULL; reqs.LowestStartingAddress = NULL; reqs.Alignment = MI_SEGMENT_SIZE; - - // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages if (pNtAllocateVirtualMemoryEx != NULL) { #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) @@ -825,10 +825,10 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) return base; } else { - // fall back to regular huge pages + // fall back to regular huge pages _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (error 0x%lx)\n", err); } - } + } // on modern Windows try use VirtualAlloc2 for aligned large OS page allocation if (pVirtualAlloc2 != NULL) { params[0].Type = MemExtendedParameterAddressRequirements; @@ -842,7 +842,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) return (*pVirtualAlloc2)(GetCurrentProcess(), NULL, size, flags, PAGE_READWRITE, params, param_count); } #endif - return NULL; // give up on older Windows.. + return NULL; // give up on older Windows.. } #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) #ifdef MI_HAS_NUMA @@ -853,7 +853,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { bool is_large = true; void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - #ifdef MI_HAS_NUMA + #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { uintptr_t numa_mask = (1UL << numa_node); long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); @@ -866,7 +866,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { #endif return p; } -#else +#else static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { return NULL; } @@ -884,12 +884,12 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { } #ifdef WIN32 -static int mi_os_numa_nodex(void) { +static int mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); GetNumaProcessorNodeEx(&pnum,&numa_node); - return (int)numa_node; + return (int)numa_node; } static int mi_os_numa_node_countx(void) { @@ -898,12 +898,42 @@ static int mi_os_numa_node_countx(void) { return (int)(numa_max + 1); } #elif MI_HAS_NUMA -#include +#include +#include +#include static int mi_os_numa_nodex(void) { - return numa_preferred(); + #define MI_MAX_MASK (4) // support at most 256 nodes + unsigned long mask[MI_MAX_MASK]; + memset(mask,0,MI_MAX_MASK*sizeof(long)); + int mode = 0; + long err = get_mempolicy(&mode, mask, MI_MAX_MASK*sizeof(long)*8, NULL, 0 /* thread policy */); + if (err != 0) return 0; + // find the lowest bit that is set + for(int i = 0; i < MI_MAX_MASK; i++) { + for(int j = 0; j < (int)(sizeof(long)*8); j++) { + if ((mask[i] & (1UL << j)) != 0) { + return (i*sizeof(long)*8 + j); + } + } + } + return 0; } + static int mi_os_numa_node_countx(void) { - return (numa_max_node() + 1); + DIR* d = opendir("/sys/devices/system/node"); + if (d==NULL) return 1; + + struct dirent* de; + int max_node_num = 0; + while ((de = readdir(d)) != NULL) { + int node_num; + if (strncmp(de->d_name, "node", 4) == 0) { + node_num = (int)strtol(de->d_name+4, NULL, 0); + if (max_node_num < node_num) max_node_num = node_num; + } + } + closedir(d); + return (max_node_num + 1); } #else static int mi_os_numa_nodex(void) { @@ -915,18 +945,28 @@ static int mi_os_numa_node_countx(void) { #endif int _mi_os_numa_node_count(void) { - long ncount = mi_os_numa_node_countx(); - // never more than max numa node and at least 1 - long nmax = 1 + mi_option_get(mi_option_max_numa_node); - if (ncount > nmax) ncount = nmax; - if (ncount <= 0) ncount = 1; - return ncount; + static int numa_node_count = 0; + if (mi_unlikely(numa_node_count <= 0)) { + int ncount = mi_os_numa_node_countx(); + // never more than max numa node and at least 1 + int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); + if (ncount > nmax) ncount = nmax; + if (ncount <= 0) ncount = 1; + numa_node_count = ncount; + } + mi_assert_internal(numa_node_count >= 1); + return numa_node_count; } -int _mi_os_numa_node(void) { - int nnode = mi_os_numa_nodex(); - // never more than the node count - int ncount = _mi_os_numa_node_count(); - if (nnode >= ncount) { nnode = nnode % ncount; } - return nnode; +int _mi_os_numa_node(mi_os_tld_t* tld) { + if (mi_unlikely(tld->numa_node < 0)) { + int nnode = mi_os_numa_nodex(); + // never more than the node count + int ncount = _mi_os_numa_node_count(); + if (nnode >= ncount) { nnode = nnode % ncount; } + if (nnode < 0) nnode = 0; + tld->numa_node = nnode; + } + mi_assert_internal(tld->numa_node >= 0 && tld->numa_node < _mi_os_numa_node_count()); + return tld->numa_node; } From a69016c33e5969b07426669b58e6a927c478c308 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 Nov 2019 10:30:16 -0700 Subject: [PATCH 043/293] improve and document numa support --- src/os.c | 39 +++++++++++++++++++++++++++++---------- test/main-override.cpp | 2 +- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/os.c b/src/os.c index 677d0ea2..fc89d642 100644 --- a/src/os.c +++ b/src/os.c @@ -854,8 +854,11 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; #ifdef MI_HAS_NUMA - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { + if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); + // TODO: does `mbind` work correctly for huge OS pages? should we + // use `set_mempolicy` before calling mmap instead? + // see: long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); @@ -883,6 +886,9 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { return p; } +/* ---------------------------------------------------------------------------- +Support NUMA aware allocation +-----------------------------------------------------------------------------*/ #ifdef WIN32 static int mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; @@ -902,6 +908,9 @@ static int mi_os_numa_node_countx(void) { #include #include static int mi_os_numa_nodex(void) { + #define MI_NUMA_NODE_SLOW // too slow, so cache it + // TODO: perhaps use RDTSCP instruction on x64? + // see #define MI_MAX_MASK (4) // support at most 256 nodes unsigned long mask[MI_MAX_MASK]; memset(mask,0,MI_MAX_MASK*sizeof(long)); @@ -945,7 +954,7 @@ static int mi_os_numa_node_countx(void) { #endif int _mi_os_numa_node_count(void) { - static int numa_node_count = 0; + static int numa_node_count = 0; // cache the node count if (mi_unlikely(numa_node_count <= 0)) { int ncount = mi_os_numa_node_countx(); // never more than max numa node and at least 1 @@ -959,14 +968,24 @@ int _mi_os_numa_node_count(void) { } int _mi_os_numa_node(mi_os_tld_t* tld) { + int numa_node; +#ifndef MI_NUMA_NODE_SLOW + UNUSED(tld); + numa_node = mi_os_numa_nodex(); +#else if (mi_unlikely(tld->numa_node < 0)) { - int nnode = mi_os_numa_nodex(); - // never more than the node count - int ncount = _mi_os_numa_node_count(); - if (nnode >= ncount) { nnode = nnode % ncount; } - if (nnode < 0) nnode = 0; - tld->numa_node = nnode; + // Cache the NUMA node of the thread if the call is slow. + // This may not be correct as threads can migrate to another cpu on + // another node -- however, for memory allocation this just means we keep + // using the same 'node id' for its allocations; new OS allocations + // naturally come from the actual node so in practice this may be fine. + tld->numa_node = mi_os_numa_nodex(); } - mi_assert_internal(tld->numa_node >= 0 && tld->numa_node < _mi_os_numa_node_count()); - return tld->numa_node; + numa_node = tld->numa_node +#endif + // never more than the node count and >= 0 + int numa_count = _mi_os_numa_node_count(); + if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } + if (numa_node < 0) numa_node = 0; + return numa_node; } diff --git a/test/main-override.cpp b/test/main-override.cpp index e006ad27..f7a7f1bd 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -24,7 +24,7 @@ public: int main() { - //mi_stats_reset(); // ignore earlier allocations + mi_stats_reset(); // ignore earlier allocations atexit(free_p); void* p1 = malloc(78); void* p2 = mi_malloc_aligned(16,24); From 70748ee1ee1da3e9ad14c2d751623e47cb3fd287 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 Nov 2019 10:39:09 -0700 Subject: [PATCH 044/293] fix missing semi colon --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index fc89d642..c41d028f 100644 --- a/src/os.c +++ b/src/os.c @@ -981,7 +981,7 @@ int _mi_os_numa_node(mi_os_tld_t* tld) { // naturally come from the actual node so in practice this may be fine. tld->numa_node = mi_os_numa_nodex(); } - numa_node = tld->numa_node + numa_node = tld->numa_node; #endif // never more than the node count and >= 0 int numa_count = _mi_os_numa_node_count(); From fd9d8c85ae40db95feb51da6e5478850bc6722fc Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 Nov 2019 11:55:03 -0700 Subject: [PATCH 045/293] change numa support on linux to use getcpu --- include/mimalloc-types.h | 1 - src/init.c | 5 ++--- src/os.c | 45 +++++++++++----------------------------- 3 files changed, 14 insertions(+), 37 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 0208d5c7..99b6b22b 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -413,7 +413,6 @@ typedef struct mi_segments_tld_s { // OS thread local data typedef struct mi_os_tld_s { size_t region_idx; // start point for next allocation - int numa_node; // numa node associated with this thread mi_stats_t* stats; // points to tld stats } mi_os_tld_t; diff --git a/src/init.c b/src/init.c index 166ca451..ed15aeba 100644 --- a/src/init.c +++ b/src/init.c @@ -99,8 +99,8 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments - { 0, -1, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { 0, tld_main_stats }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { @@ -218,7 +218,6 @@ static bool _mi_heap_init(void) { memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; tld->segments.stats = &tld->stats; - tld->os.numa_node = -1; tld->os.stats = &tld->stats; _mi_heap_default = heap; } diff --git a/src/os.c b/src/os.c index c41d028f..8e1b3e91 100644 --- a/src/os.c +++ b/src/os.c @@ -903,29 +903,21 @@ static int mi_os_numa_node_countx(void) { GetNumaHighestNodeNumber(&numa_max); return (int)(numa_max + 1); } -#elif MI_HAS_NUMA +#elif defined(__linux__) #include #include -#include +#include + static int mi_os_numa_nodex(void) { - #define MI_NUMA_NODE_SLOW // too slow, so cache it - // TODO: perhaps use RDTSCP instruction on x64? - // see - #define MI_MAX_MASK (4) // support at most 256 nodes - unsigned long mask[MI_MAX_MASK]; - memset(mask,0,MI_MAX_MASK*sizeof(long)); - int mode = 0; - long err = get_mempolicy(&mode, mask, MI_MAX_MASK*sizeof(long)*8, NULL, 0 /* thread policy */); +#ifdef SYS_getcpu + unsigned node = 0; + unsigned ncpu = 0; + int err = syscall(SYS_getcpu, &ncpu, &node, NULL); if (err != 0) return 0; - // find the lowest bit that is set - for(int i = 0; i < MI_MAX_MASK; i++) { - for(int j = 0; j < (int)(sizeof(long)*8); j++) { - if ((mask[i] & (1UL << j)) != 0) { - return (i*sizeof(long)*8 + j); - } - } - } - return 0; + return (int)node; +#else + return 0; +#endif } static int mi_os_numa_node_countx(void) { @@ -968,21 +960,8 @@ int _mi_os_numa_node_count(void) { } int _mi_os_numa_node(mi_os_tld_t* tld) { - int numa_node; -#ifndef MI_NUMA_NODE_SLOW UNUSED(tld); - numa_node = mi_os_numa_nodex(); -#else - if (mi_unlikely(tld->numa_node < 0)) { - // Cache the NUMA node of the thread if the call is slow. - // This may not be correct as threads can migrate to another cpu on - // another node -- however, for memory allocation this just means we keep - // using the same 'node id' for its allocations; new OS allocations - // naturally come from the actual node so in practice this may be fine. - tld->numa_node = mi_os_numa_nodex(); - } - numa_node = tld->numa_node; -#endif + int numa_node = mi_os_numa_nodex(); // never more than the node count and >= 0 int numa_count = _mi_os_numa_node_count(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } From ee323aabac42ab4333e40cedd02f0eb1d4356b4e Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 Nov 2019 15:56:21 -0700 Subject: [PATCH 046/293] fix vs2017 build --- ide/vs2017/mimalloc-override.vcxproj | 1 + ide/vs2017/mimalloc-override.vcxproj.filters | 3 +++ ide/vs2017/mimalloc.vcxproj | 1 + ide/vs2017/mimalloc.vcxproj.filters | 3 +++ src/os.c | 5 +++-- 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 511c0fab..1fc70b33 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -231,6 +231,7 @@ + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 6ac0c0b5..75a8e032 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -70,5 +70,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 6147c349..484c4db8 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -217,6 +217,7 @@ + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index a2b64314..598b8643 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -53,6 +53,9 @@ Source Files + + Source Files + diff --git a/src/os.c b/src/os.c index 8e1b3e91..4aa4abf3 100644 --- a/src/os.c +++ b/src/os.c @@ -794,6 +794,7 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`) static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { mi_assert_internal(size%GiB == 0); + mi_win_enable_large_os_pages(); #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; @@ -812,7 +813,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) params[0].Pointer = &reqs; params[1].Type = 5; // == MemExtendedParameterAttributeFlags; params[1].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - size_t param_count = 2; + ULONG param_count = 2; if (numa_node >= 0) { param_count++; params[2].Type = MemExtendedParameterNumaNode; @@ -833,7 +834,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) if (pVirtualAlloc2 != NULL) { params[0].Type = MemExtendedParameterAddressRequirements; params[0].Pointer = &reqs; - size_t param_count = 1; + ULONG param_count = 1; if (numa_node >= 0) { param_count++; params[1].Type = MemExtendedParameterNumaNode; From 62cd0237fc8583f357fe4599889011f845690af1 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 Nov 2019 17:49:34 -0700 Subject: [PATCH 047/293] fix aligned huge page allocation on windows --- src/arena.c | 2 +- src/os.c | 118 +++++++++++++++++++++++++++++++++------------------- 2 files changed, 76 insertions(+), 44 deletions(-) diff --git a/src/arena.c b/src/arena.c index 7eb755c4..56b09859 100644 --- a/src/arena.c +++ b/src/arena.c @@ -435,7 +435,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { UNUSED(max_secs); - _mi_verbose_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); + _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; int err = mi_reserve_huge_os_pages_interleave(pages); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; diff --git a/src/os.c b/src/os.c index 4aa4abf3..e1dc31f8 100644 --- a/src/os.c +++ b/src/os.c @@ -791,68 +791,68 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`) #define MI_HUGE_OS_PAGE_SIZE (GiB) #if defined(WIN32) && (MI_INTPTR_SIZE >= 8) -static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { mi_assert_internal(size%GiB == 0); + mi_assert_internal(addr != NULL); + const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; + mi_win_enable_large_os_pages(); - + + void* p = NULL; #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; - MEM_EXTENDED_PARAMETER params[4] = { {0,0},{0,0},{0,0},{0,0} }; - MEM_ADDRESS_REQUIREMENTS reqs = {0,0,0}; - reqs.HighestEndingAddress = NULL; - reqs.LowestStartingAddress = NULL; - reqs.Alignment = MI_SEGMENT_SIZE; - + MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages if (pNtAllocateVirtualMemoryEx != NULL) { #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) #endif - params[0].Type = MemExtendedParameterAddressRequirements; - params[0].Pointer = &reqs; - params[1].Type = 5; // == MemExtendedParameterAttributeFlags; - params[1].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - ULONG param_count = 2; - if (numa_node >= 0) { - param_count++; - params[2].Type = MemExtendedParameterNumaNode; - params[2].ULong = (unsigned)numa_node; - } - SIZE_T psize = size; - void* base = NULL; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); - if (err == 0) { - return base; - } - else { - // fall back to regular huge pages - _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (error 0x%lx)\n", err); - } - } - // on modern Windows try use VirtualAlloc2 for aligned large OS page allocation - if (pVirtualAlloc2 != NULL) { - params[0].Type = MemExtendedParameterAddressRequirements; - params[0].Pointer = &reqs; + params[0].Type = 5; // == MemExtendedParameterAttributeFlags; + params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; ULONG param_count = 1; if (numa_node >= 0) { param_count++; params[1].Type = MemExtendedParameterNumaNode; params[1].ULong = (unsigned)numa_node; } - return (*pVirtualAlloc2)(GetCurrentProcess(), NULL, size, flags, PAGE_READWRITE, params, param_count); + SIZE_T psize = size; + void* base = addr; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + if (err == 0 && base != NULL) { + return base; + } + else { + // fall back to regular huge pages + _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); + } } + // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation + if (pVirtualAlloc2 != NULL && numa_node >= 0) { + params[0].Type = MemExtendedParameterNumaNode; + params[0].ULong = (unsigned)numa_node; + p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); + } + else #endif - return NULL; // give up on older Windows.. + // use regular virtual alloc on older windows + { + p = VirtualAlloc(addr, size, flags, PAGE_READWRITE); + } + + if (p == NULL) { + _mi_warning_message("failed to allocate huge OS pages (size %zu) (error %d)\n", size, GetLastError()); + } + return p; } + #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) #ifdef MI_HAS_NUMA #include // mbind, and use -lnuma #endif -static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { mi_assert_internal(size%GiB == 0); bool is_large = true; - void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes @@ -871,19 +871,51 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { return p; } #else -static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { return NULL; } #endif +// To ensure proper alignment, use our own area for huge OS pages +static _Atomic(uintptr_t) mi_huge_start; // = 0 + +// Allocate MI_SEGMENT_SIZE aligned huge pages void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { if (psize != NULL) *psize = 0; - size_t size = pages * MI_HUGE_OS_PAGE_SIZE; - void* p = mi_os_alloc_huge_os_pagesx(size, numa_node); - if (p==NULL) return NULL; - if (psize != NULL) *psize = size; + const size_t size = pages * MI_HUGE_OS_PAGE_SIZE; + + // Find a new aligned address for the huge pages + uintptr_t start = 0; + uintptr_t end = 0; + uintptr_t expected; + do { + start = expected = mi_atomic_read_relaxed(&mi_huge_start); + if (start == 0) { + // Initialize the start address after the 32TiB area + start = ((uintptr_t)32 << 40); // 32TiB virtual start address + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages); + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + #endif + } + end = start + size; + mi_assert_internal(end % MI_SEGMENT_SIZE == 0); + } while (!mi_atomic_cas_strong(&mi_huge_start, end, expected)); + + // And allocate + void* p = mi_os_alloc_huge_os_pagesx((void*)start, size, numa_node); + if (p == NULL) { + return NULL; + } _mi_stat_increase(&_mi_stats_main.committed, size); _mi_stat_increase(&_mi_stats_main.reserved, size); + if ((uintptr_t)p % MI_SEGMENT_SIZE != 0) { // must be aligned + _mi_warning_message("huge page area was not aligned\n"); + _mi_os_free(p,size,&_mi_stats_main); + return NULL; + } + + if (psize != NULL) *psize = size; return p; } From 723fbba2596e663b6dac40da5e486c0ac52501f3 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 12:18:20 -0800 Subject: [PATCH 048/293] fix output during preloading enabling stderr only after the crt has loaded --- src/options.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/options.c b/src/options.c index 32f13d54..3a7833a2 100644 --- a/src/options.c +++ b/src/options.c @@ -14,6 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // toupper #include +static void mi_add_stderr_output(); + int mi_version(void) mi_attr_noexcept { return MI_MALLOC_VERSION; } @@ -73,7 +75,9 @@ static mi_option_desc_t options[_mi_option_last] = static void mi_option_init(mi_option_desc_t* desc); void _mi_options_init(void) { - // called on process load + // called on process load; should not be called before the CRT is initialized! + // (e.g. do not call this from process_init as that may run before CRT initialization) + mi_add_stderr_output(); // now it safe to use stderr for output for(int i = 0; i < _mi_option_last; i++ ) { mi_option_t option = (mi_option_t)i; mi_option_get(option); // initialize @@ -135,7 +139,7 @@ static void mi_out_stderr(const char* msg) { #ifdef _WIN32 // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. - _cputs(msg); + if (!_mi_preloading()) { _cputs(msg); } #else fputs(msg, stderr); #endif @@ -166,23 +170,29 @@ static void mi_out_buf(const char* msg) { memcpy(&out_buf[start], msg, n); } -static void mi_out_buf_flush(mi_output_fun* out) { +static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { if (out==NULL) return; - // claim all (no more output will be added after this point) - size_t count = mi_atomic_addu(&out_len, MI_MAX_DELAY_OUTPUT); + // claim (if `no_more_buf == true`, no more output will be added after this point) + size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; out(out_buf); + if (!no_more_buf) { + out_buf[count] = '\n'; // if continue with the buffer, insert a newline + } } -// The initial default output, outputs to stderr and the delayed output buffer. + +// Once this module is loaded, switch to this routine +// which outputs to stderr and the delayed output buffer. static void mi_out_buf_stderr(const char* msg) { mi_out_stderr(msg); mi_out_buf(msg); } + // -------------------------------------------------------- // Default output handler // -------------------------------------------------------- @@ -194,14 +204,19 @@ static mi_output_fun* volatile mi_out_default; // = NULL static mi_output_fun* mi_out_get_default(void) { mi_output_fun* out = mi_out_default; - return (out == NULL ? &mi_out_buf_stderr : out); + return (out == NULL ? &mi_out_buf : out); } void mi_register_output(mi_output_fun* out) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - if (out!=NULL) mi_out_buf_flush(out); // output the delayed output now + if (out!=NULL) mi_out_buf_flush(out,true); // output all the delayed output now } +// add stderr to the delayed output after the module is loaded +static void mi_add_stderr_output() { + mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr + mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output +} // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. @@ -214,7 +229,7 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT static mi_decl_thread bool recurse = false; void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { - if (_mi_preloading() || recurse) return; + if (recurse) return; if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(); recurse = true; if (prefix != NULL) out(prefix); @@ -228,7 +243,7 @@ void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) { char buf[512]; if (fmt==NULL) return; - if (_mi_preloading() || recurse) return; + if (recurse) return; recurse = true; vsnprintf(buf,sizeof(buf)-1,fmt,args); recurse = false; From e32048879183c2672db7d06138ca6f4eb80ebfa1 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 12:18:32 -0800 Subject: [PATCH 049/293] add numa nodes to stats --- include/mimalloc-internal.h | 2 +- src/os.c | 7 +++++-- src/stats.c | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index b4d3351d..c28cf0fd 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -57,7 +57,7 @@ void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocat void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); int _mi_os_numa_node(mi_os_tld_t* tld); - +int _mi_os_numa_node_count(void); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); diff --git a/src/os.c b/src/os.c index e1dc31f8..af3c440c 100644 --- a/src/os.c +++ b/src/os.c @@ -840,7 +840,8 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } if (p == NULL) { - _mi_warning_message("failed to allocate huge OS pages (size %zu) (error %d)\n", size, GetLastError()); + DWORD winerr = GetLastError(); + _mi_warning_message("failed to allocate huge OS pages (size %zu) (windows error %d%s)\n", size, winerr, (winerr==1450 ? " (insufficient resources)" : "")); } return p; } @@ -981,12 +982,14 @@ static int mi_os_numa_node_countx(void) { int _mi_os_numa_node_count(void) { static int numa_node_count = 0; // cache the node count if (mi_unlikely(numa_node_count <= 0)) { - int ncount = mi_os_numa_node_countx(); + int ncount = mi_os_numa_node_countx(); + int ncount0 = ncount; // never more than max numa node and at least 1 int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); if (ncount > nmax) ncount = nmax; if (ncount <= 0) ncount = 1; numa_node_count = ncount; + _mi_verbose_message("using %i numa regions (%i nodes detected)\n", numa_node_count, ncount0); } mi_assert_internal(numa_node_count >= 1); return numa_node_count; diff --git a/src/stats.c b/src/stats.c index 50bd029d..79362cc4 100644 --- a/src/stats.c +++ b/src/stats.c @@ -265,7 +265,7 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_stat_counter_print(&stats->commit_calls, "commits", out); mi_stat_print(&stats->threads, "threads", -1, out); mi_stat_counter_print_avg(&stats->searches, "searches", out); - + _mi_fprintf(out, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs); double user_time; From f36ec5d9d8275777e05526468524dfd9d433164e Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:16:07 -0800 Subject: [PATCH 050/293] reserve huge pages incrementally --- src/arena.c | 23 ++++++---- src/options.c | 1 - src/os.c | 120 ++++++++++++++++++++++++++++++++++---------------- 3 files changed, 96 insertions(+), 48 deletions(-) diff --git a/src/arena.c b/src/arena.c index 56b09859..24fd2114 100644 --- a/src/arena.c +++ b/src/arena.c @@ -27,7 +27,10 @@ with on-demand coalescing. void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); //int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize); + +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize); +void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); + int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- @@ -234,12 +237,12 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index); if (p != NULL) { mi_assert_internal(block_index != SIZE_MAX); -#if MI_DEBUG>=1 + #if MI_DEBUG>=1 _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; mi_block_info_t binfo = mi_atomic_read(block); mi_assert_internal(mi_block_is_in_use(binfo)); mi_assert_internal(mi_block_count(binfo) >= needed_bcount); -#endif + #endif *memid = mi_memid_create(arena_index, block_index); *commit = true; // TODO: support commit on demand? *large = arena->is_large; @@ -382,18 +385,22 @@ static bool mi_arena_add(mi_arena_t* arena) { // reserve at a specific numa node int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { - size_t hsize = 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize); - if (p==NULL) return ENOMEM; - _mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages); + size_t hsize = 0; + size_t pages_reserved = 0; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, (double)pages / 2.0, &pages_reserved, &hsize); + if (p==NULL || pages_reserved==0) { + _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); + return ENOMEM; + } + _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { - _mi_os_free(p, hsize, &_mi_stats_main); + _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } arena->block_count = bcount; diff --git a/src/options.c b/src/options.c index 3a7833a2..11d12187 100644 --- a/src/options.c +++ b/src/options.c @@ -221,7 +221,6 @@ static void mi_add_stderr_output() { // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. // -------------------------------------------------------- -#define MAX_ERROR_COUNT (10) static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings // When overriding malloc, we may recurse into mi_vfprintf if an allocation diff --git a/src/os.c b/src/os.c index af3c440c..5947333d 100644 --- a/src/os.c +++ b/src/os.c @@ -339,7 +339,8 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro lflags |= MAP_HUGETLB; #endif #ifdef MAP_HUGE_1GB - if ((size % GiB) == 0) { + static bool mi_huge_pages_available = true; + if ((size % GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else @@ -358,6 +359,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); @@ -799,11 +801,11 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) mi_win_enable_large_os_pages(); - void* p = NULL; #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - if (pNtAllocateVirtualMemoryEx != NULL) { + static bool mi_huge_pages_available = true; + if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) #endif @@ -822,7 +824,8 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) return base; } else { - // fall back to regular huge pages + // fall back to regular large pages + mi_huge_pages_available = false; // don't try further huge pages _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); } } @@ -830,20 +833,11 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) if (pVirtualAlloc2 != NULL && numa_node >= 0) { params[0].Type = MemExtendedParameterNumaNode; params[0].ULong = (unsigned)numa_node; - p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); + return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } - else #endif - // use regular virtual alloc on older windows - { - p = VirtualAlloc(addr, size, flags, PAGE_READWRITE); - } - - if (p == NULL) { - DWORD winerr = GetLastError(); - _mi_warning_message("failed to allocate huge OS pages (size %zu) (windows error %d%s)\n", size, winerr, (winerr==1450 ? " (insufficient resources)" : "")); - } - return p; + // otherwise use regular virtual alloc on older windows + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) @@ -880,44 +874,92 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) // To ensure proper alignment, use our own area for huge OS pages static _Atomic(uintptr_t) mi_huge_start; // = 0 -// Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { - if (psize != NULL) *psize = 0; +// Claim an aligned address range for huge pages +static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { + if (total_size != NULL) *total_size = 0; const size_t size = pages * MI_HUGE_OS_PAGE_SIZE; - // Find a new aligned address for the huge pages uintptr_t start = 0; uintptr_t end = 0; uintptr_t expected; do { - start = expected = mi_atomic_read_relaxed(&mi_huge_start); + start = expected = mi_atomic_read_relaxed(&mi_huge_start); if (start == 0) { // Initialize the start address after the 32TiB area - start = ((uintptr_t)32 << 40); // 32TiB virtual start address - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages); + start = ((uintptr_t)32 << 40); // 32TiB virtual start address +#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB - #endif +#endif } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); } while (!mi_atomic_cas_strong(&mi_huge_start, end, expected)); - // And allocate - void* p = mi_os_alloc_huge_os_pagesx((void*)start, size, numa_node); - if (p == NULL) { - return NULL; - } - _mi_stat_increase(&_mi_stats_main.committed, size); - _mi_stat_increase(&_mi_stats_main.reserved, size); - if ((uintptr_t)p % MI_SEGMENT_SIZE != 0) { // must be aligned - _mi_warning_message("huge page area was not aligned\n"); - _mi_os_free(p,size,&_mi_stats_main); - return NULL; - } + if (total_size != NULL) *total_size = size; + return (uint8_t*)start; +} + +// Allocate MI_SEGMENT_SIZE aligned huge pages +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize) { + if (psize != NULL) *psize = 0; + if (pages_reserved != NULL) *pages_reserved = 0; + size_t size = 0; + uint8_t* start = mi_os_claim_huge_pages(pages, &size); - if (psize != NULL) *psize = size; - return p; + // Allocate one page at the time but try to place them contiguously + // We allocate one page at the time to be able to abort if it takes too long + // or to at least allocate as many as available on the system. + double start_t = _mi_clock_start(); + size_t page; + for (page = 0; page < pages; page++) { + // allocate a page + bool is_large = true; + void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); + void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); + + // Did we succeed at a contiguous address? + if (p != addr) { + // no success, issue a warning and break + if (p != NULL) { + _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); + _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); + } + break; + } + + // success, record it + _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); + _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); + + // check for timeout + double elapsed = _mi_clock_end(start_t); + if (page >= 1) { + double estimate = ((elapsed / (double)(page+1)) * (double)pages); + if (estimate > 1.5*max_secs) { // seems like we are going to timeout, break + elapsed = max_secs + 1.0; + } + } + if (elapsed > max_secs) { + _mi_warning_message("huge page allocation timed out\n"); + break; + } + } + mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); + if (pages_reserved != NULL) *pages_reserved = page; + if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; + return (page == 0 ? NULL : start); +} + +// free every huge page in a range individually (as we allocated per page) +// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. +void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { + if (p==NULL || size==0) return; + uint8_t* base = (uint8_t*)p; + while (size >= MI_HUGE_OS_PAGE_SIZE) { + _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); + size -= MI_HUGE_OS_PAGE_SIZE; + } } /* ---------------------------------------------------------------------------- From 520a8dafee0747e1da8b220b28b35298f10512b2 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:25:28 -0800 Subject: [PATCH 051/293] divide huge pages more even --- src/arena.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/arena.c b/src/arena.c index 24fd2114..95a102d1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -385,6 +385,7 @@ static bool mi_arena_add(mi_arena_t* arena) { // reserve at a specific numa node int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { + if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; @@ -422,18 +423,20 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { // pages per numa node int numa_count = _mi_os_numa_node_count(); if (numa_count <= 0) numa_count = 1; - size_t pages_per = pages / numa_count; - if (pages_per == 0) pages_per = 1; + const size_t pages_per = pages / numa_count; + const size_t pages_mod = pages % numa_count; // reserve evenly among numa nodes for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { - int err = mi_reserve_huge_os_pages_at((pages_per > pages ? pages : pages_per), numa_node); + size_t node_pages = pages_per; // can be 0 + if (numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node); if (err) return err; - if (pages < pages_per) { + if (pages < node_pages) { pages = 0; } else { - pages -= pages_per; + pages -= node_pages; } } From d1d65fbca4d037c5b9cc0838074804fde1f505c7 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:25:41 -0800 Subject: [PATCH 052/293] make max error messages configurable --- include/mimalloc.h | 1 + src/options.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index b155aca6..c03ddc1e 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -277,6 +277,7 @@ typedef enum mi_option_e { mi_option_segment_reset, mi_option_os_tag, mi_option_max_numa_node, + mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index 11d12187..63b1612a 100644 --- a/src/options.c +++ b/src/options.c @@ -14,6 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // toupper #include +static uintptr_t mi_max_error_count = 16; // stop outputting errors after this + static void mi_add_stderr_output(); int mi_version(void) mi_attr_noexcept { @@ -69,7 +71,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 256, UNINIT, MI_OPTION(max_numa_node) } // maximum allowed numa node + { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; static void mi_option_init(mi_option_desc_t* desc); @@ -86,6 +89,7 @@ void _mi_options_init(void) { _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } } + mi_max_error_count = mi_option_get(mi_option_max_errors); } long mi_option_get(mi_option_t option) { @@ -275,7 +279,7 @@ void _mi_verbose_message(const char* fmt, ...) { void _mi_error_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, "mimalloc: error: ", fmt, args); @@ -285,7 +289,7 @@ void _mi_error_message(const char* fmt, ...) { void _mi_warning_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args); From 9d6a5acb228db9cd4ae8f50ef2295e9b5d57e3c8 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:34:54 -0800 Subject: [PATCH 053/293] fix unix build warnings --- CMakeLists.txt | 5 +++-- src/arena.c | 2 +- src/heap.c | 2 +- src/os.c | 1 - src/page.c | 2 +- src/segment.c | 6 ++++-- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e96c237..12540f68 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.0) project(libmimalloc C CXX) -include("cmake/mimalloc-config-version.cmake") -include("CheckIncludeFile") set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) @@ -15,6 +13,9 @@ option(MI_SECURE "Use security mitigations (like guard pages and rand option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) +include("cmake/mimalloc-config-version.cmake") +include("CheckIncludeFile") + set(mi_install_dir "lib/mimalloc-${mi_version}") set(mi_sources diff --git a/src/arena.c b/src/arena.c index 95a102d1..08a36415 100644 --- a/src/arena.c +++ b/src/arena.c @@ -429,7 +429,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { // reserve evenly among numa nodes for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if (numa_node < pages_mod) node_pages++; + if ((size_t)numa_node < pages_mod) node_pages++; int err = mi_reserve_huge_os_pages_at(node_pages, numa_node); if (err) return err; if (pages < node_pages) { diff --git a/src/heap.c b/src/heap.c index 15c5d02a..162cf406 100644 --- a/src/heap.c +++ b/src/heap.c @@ -45,7 +45,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void } -#if MI_DEBUG>1 +#if MI_DEBUG>=3 static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { UNUSED(arg1); UNUSED(arg2); diff --git a/src/os.c b/src/os.c index 5947333d..3f299362 100644 --- a/src/os.c +++ b/src/os.c @@ -914,7 +914,6 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, s size_t page; for (page = 0; page < pages; page++) { // allocate a page - bool is_large = true; void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); diff --git a/src/page.c b/src/page.c index f7fad764..32b68edb 100644 --- a/src/page.c +++ b/src/page.c @@ -38,7 +38,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_stats_t* stats); -#if (MI_DEBUG>1) +#if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { size_t count = 0; while (head != NULL) { diff --git a/src/segment.c b/src/segment.c index dcc6a04b..178e0eda 100644 --- a/src/segment.c +++ b/src/segment.c @@ -41,7 +41,7 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ -#if (MI_DEBUG>1) +#if (MI_DEBUG>=3) static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { mi_assert_internal(segment != NULL); mi_segment_t* list = queue->first; @@ -111,7 +111,7 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t Invariant checking ----------------------------------------------------------- */ -#if (MI_DEBUG > 1) +#if (MI_DEBUG>=2) static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); @@ -120,7 +120,9 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t } return in_queue; } +#endif +#if (MI_DEBUG>=3) static size_t mi_segment_pagesize(mi_segment_t* segment) { return ((size_t)1 << segment->page_shift); } From 8afd06b248f6a82763292821bf5096e35f6a5a0b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 4 Nov 2019 08:44:40 -0800 Subject: [PATCH 054/293] use int64 for time (instead of double) --- include/mimalloc-internal.h | 7 ++- src/arena.c | 4 +- src/memory.c | 1 + src/os.c | 22 ++++++--- src/stats.c | 95 ++++++++++++++++++------------------- 5 files changed, 70 insertions(+), 59 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index c28cf0fd..413f76e6 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -106,8 +106,11 @@ uintptr_t _mi_heap_random(mi_heap_t* heap); // "stats.c" void _mi_stats_done(mi_stats_t* stats); -double _mi_clock_end(double start); -double _mi_clock_start(void); + +typedef int64_t mi_msecs_t; +mi_msecs_t _mi_clock_now(void); +mi_msecs_t _mi_clock_end(mi_msecs_t start); +mi_msecs_t _mi_clock_start(void); // "alloc.c" void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` diff --git a/src/arena.c b/src/arena.c index 08a36415..6faf7d3e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -28,7 +28,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar //int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); int _mi_os_numa_node_count(void); @@ -390,7 +390,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, (double)pages / 2.0, &pages_reserved, &hsize); + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, pages*500, &pages_reserved, &hsize); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; diff --git a/src/memory.c b/src/memory.c index a425393c..75a1df92 100644 --- a/src/memory.c +++ b/src/memory.c @@ -564,6 +564,7 @@ void _mi_mem_collect(mi_stats_t* stats) { } } + /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ diff --git a/src/os.c b/src/os.c index 3f299362..44ef9830 100644 --- a/src/os.c +++ b/src/os.c @@ -871,6 +871,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } #endif +#if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages static _Atomic(uintptr_t) mi_huge_start; // = 0 @@ -899,18 +900,25 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { if (total_size != NULL) *total_size = size; return (uint8_t*)start; } +#else +static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { + if (total_size != NULL) *total_size = 0; + return NULL; +} +#endif // Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize) { +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) { if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; uint8_t* start = mi_os_claim_huge_pages(pages, &size); + if (start == NULL) return NULL; // or 32-bit systems // Allocate one page at the time but try to place them contiguously // We allocate one page at the time to be able to abort if it takes too long // or to at least allocate as many as available on the system. - double start_t = _mi_clock_start(); + mi_msecs_t start_t = _mi_clock_start(); size_t page; for (page = 0; page < pages; page++) { // allocate a page @@ -932,14 +940,14 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, s _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); // check for timeout - double elapsed = _mi_clock_end(start_t); + mi_msecs_t elapsed = _mi_clock_end(start_t); if (page >= 1) { - double estimate = ((elapsed / (double)(page+1)) * (double)pages); - if (estimate > 1.5*max_secs) { // seems like we are going to timeout, break - elapsed = max_secs + 1.0; + mi_msecs_t estimate = ((elapsed / (page+1)) * pages); + if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + elapsed = max_msecs + 1; } } - if (elapsed > max_secs) { + if (elapsed > max_msecs) { _mi_warning_message("huge page allocation timed out\n"); break; } diff --git a/src/stats.c b/src/stats.c index 79362cc4..a1248043 100644 --- a/src/stats.c +++ b/src/stats.c @@ -231,9 +231,9 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin #endif -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); -static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_attr_noexcept { +static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) mi_attr_noexcept { mi_print_header(out); #if MI_STAT>1 mi_stat_count_t normal = { 0,0,0,0 }; @@ -266,16 +266,16 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_stat_print(&stats->threads, "threads", -1, out); mi_stat_counter_print_avg(&stats->searches, "searches", out); _mi_fprintf(out, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); - if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs); + if (elapsed > 0) _mi_fprintf(out, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); - double user_time; - double sys_time; + mi_msecs_t user_time; + mi_msecs_t sys_time; size_t peak_rss; size_t page_faults; size_t page_reclaim; size_t peak_commit; mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit); - _mi_fprintf(out,"%10s: user: %.3f s, system: %.3f s, faults: %lu, reclaims: %lu, rss: ", "process", user_time, sys_time, (unsigned long)page_faults, (unsigned long)page_reclaim ); + _mi_fprintf(out,"%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim ); mi_printf_amount((int64_t)peak_rss, 1, out, "%s"); if (peak_commit > 0) { _mi_fprintf(out,", commit charge: "); @@ -284,9 +284,7 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) _mi_fprintf(out,"\n"); } -double _mi_clock_end(double start); -double _mi_clock_start(void); -static double mi_time_start = 0.0; +static mi_msecs_t mi_time_start; // = 0 static mi_stats_t* mi_stats_get_default(void) { mi_heap_t* heap = mi_heap_get_default(); @@ -316,71 +314,72 @@ void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` } -static void mi_stats_print_ex(mi_stats_t* stats, double secs, mi_output_fun* out) { +static void mi_stats_print_ex(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) { mi_stats_merge_from(stats); - _mi_stats_print(&_mi_stats_main, secs, out); + _mi_stats_print(&_mi_stats_main, elapsed, out); } void mi_stats_print(mi_output_fun* out) mi_attr_noexcept { - mi_stats_print_ex(mi_stats_get_default(),_mi_clock_end(mi_time_start),out); + mi_msecs_t elapsed = _mi_clock_end(mi_time_start); + mi_stats_print_ex(mi_stats_get_default(),elapsed,out); } void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept { - _mi_stats_print(mi_stats_get_default(), _mi_clock_end(mi_time_start), out); + mi_msecs_t elapsed = _mi_clock_end(mi_time_start); + _mi_stats_print(mi_stats_get_default(), elapsed, out); } - -// -------------------------------------------------------- -// Basic timer for convenience -// -------------------------------------------------------- - +// ---------------------------------------------------------------- +// Basic timer for convenience; use milli-seconds to avoid doubles +// ---------------------------------------------------------------- #ifdef _WIN32 #include -static double mi_to_seconds(LARGE_INTEGER t) { - static double freq = 0.0; - if (freq <= 0.0) { +static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { + static LARGE_INTEGER mfreq; // = 0 + if (mfreq.QuadPart == 0LL) { LARGE_INTEGER f; QueryPerformanceFrequency(&f); - freq = (double)(f.QuadPart); + mfreq.QuadPart = f.QuadPart/1000LL; + if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; } - return ((double)(t.QuadPart) / freq); + return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); } -static double mi_clock_now(void) { +mi_msecs_t _mi_clock_now(void) { LARGE_INTEGER t; QueryPerformanceCounter(&t); - return mi_to_seconds(t); + return mi_to_msecs(t); } #else #include #ifdef CLOCK_REALTIME -static double mi_clock_now(void) { +mi_msecs_t _mi_clock_now(void) { struct timespec t; clock_gettime(CLOCK_REALTIME, &t); - return (double)t.tv_sec + (1.0e-9 * (double)t.tv_nsec); + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); } #else // low resolution timer -static double mi_clock_now(void) { - return ((double)clock() / (double)CLOCKS_PER_SEC); +mi_msecs_t _mi_clock_now(void) { + return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); } #endif #endif -static double mi_clock_diff = 0.0; +static mi_msecs_t mi_clock_diff; -double _mi_clock_start(void) { +mi_msecs_t _mi_clock_start(void) { if (mi_clock_diff == 0.0) { - double t0 = mi_clock_now(); - mi_clock_diff = mi_clock_now() - t0; + mi_msecs_t t0 = _mi_clock_now(); + mi_clock_diff = _mi_clock_now() - t0; } - return mi_clock_now(); + return _mi_clock_now(); } -double _mi_clock_end(double start) { - double end = mi_clock_now(); +mi_msecs_t _mi_clock_end(mi_msecs_t start) { + mi_msecs_t end = _mi_clock_now(); return (end - start - mi_clock_diff); } @@ -394,21 +393,21 @@ double _mi_clock_end(double start) { #include #pragma comment(lib,"psapi.lib") -static double filetime_secs(const FILETIME* ftime) { +static mi_msecs_t filetime_msecs(const FILETIME* ftime) { ULARGE_INTEGER i; i.LowPart = ftime->dwLowDateTime; i.HighPart = ftime->dwHighDateTime; - double secs = (double)(i.QuadPart) * 1.0e-7; // FILETIME is in 100 nano seconds - return secs; + mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds + return msecs; } -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { FILETIME ct; FILETIME ut; FILETIME st; FILETIME et; GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); - *utime = filetime_secs(&ut); - *stime = filetime_secs(&st); + *utime = filetime_msecs(&ut); + *stime = filetime_msecs(&st); PROCESS_MEMORY_COUNTERS info; GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); @@ -427,11 +426,11 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size #include #endif -static double timeval_secs(const struct timeval* tv) { - return (double)tv->tv_sec + ((double)tv->tv_usec * 1.0e-6); +static mi_msecs_t timeval_secs(const struct timeval* tv) { + return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); } -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); #if defined(__APPLE__) && defined(__MACH__) @@ -452,12 +451,12 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size #pragma message("define a way to get process info") #endif -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { *peak_rss = 0; *page_faults = 0; *page_reclaim = 0; *peak_commit = 0; - *utime = 0.0; - *stime = 0.0; + *utime = 0; + *stime = 0; } #endif From aece753dce816e6c6967449eef5de5fd30d4a294 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 12:18:20 -0800 Subject: [PATCH 055/293] fix output during preloading enabling stderr only after the crt has loaded --- src/options.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/options.c b/src/options.c index a49c46ed..0b3c6c97 100644 --- a/src/options.c +++ b/src/options.c @@ -14,6 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // toupper #include +static void mi_add_stderr_output(); + int mi_version(void) mi_attr_noexcept { return MI_MALLOC_VERSION; } @@ -72,7 +74,9 @@ static mi_option_desc_t options[_mi_option_last] = static void mi_option_init(mi_option_desc_t* desc); void _mi_options_init(void) { - // called on process load + // called on process load; should not be called before the CRT is initialized! + // (e.g. do not call this from process_init as that may run before CRT initialization) + mi_add_stderr_output(); // now it safe to use stderr for output for(int i = 0; i < _mi_option_last; i++ ) { mi_option_t option = (mi_option_t)i; mi_option_get(option); // initialize @@ -134,7 +138,7 @@ static void mi_out_stderr(const char* msg) { #ifdef _WIN32 // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. - _cputs(msg); + if (!_mi_preloading()) { _cputs(msg); } #else fputs(msg, stderr); #endif @@ -165,23 +169,29 @@ static void mi_out_buf(const char* msg) { memcpy(&out_buf[start], msg, n); } -static void mi_out_buf_flush(mi_output_fun* out) { +static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { if (out==NULL) return; - // claim all (no more output will be added after this point) - size_t count = mi_atomic_addu(&out_len, MI_MAX_DELAY_OUTPUT); + // claim (if `no_more_buf == true`, no more output will be added after this point) + size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; out(out_buf); + if (!no_more_buf) { + out_buf[count] = '\n'; // if continue with the buffer, insert a newline + } } -// The initial default output, outputs to stderr and the delayed output buffer. + +// Once this module is loaded, switch to this routine +// which outputs to stderr and the delayed output buffer. static void mi_out_buf_stderr(const char* msg) { mi_out_stderr(msg); mi_out_buf(msg); } + // -------------------------------------------------------- // Default output handler // -------------------------------------------------------- @@ -193,14 +203,19 @@ static mi_output_fun* volatile mi_out_default; // = NULL static mi_output_fun* mi_out_get_default(void) { mi_output_fun* out = mi_out_default; - return (out == NULL ? &mi_out_buf_stderr : out); + return (out == NULL ? &mi_out_buf : out); } void mi_register_output(mi_output_fun* out) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - if (out!=NULL) mi_out_buf_flush(out); // output the delayed output now + if (out!=NULL) mi_out_buf_flush(out,true); // output all the delayed output now } +// add stderr to the delayed output after the module is loaded +static void mi_add_stderr_output() { + mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr + mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output +} // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. @@ -213,7 +228,7 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT static mi_decl_thread bool recurse = false; void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { - if (_mi_preloading() || recurse) return; + if (recurse) return; if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(); recurse = true; if (prefix != NULL) out(prefix); @@ -227,7 +242,7 @@ void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) { char buf[512]; if (fmt==NULL) return; - if (_mi_preloading() || recurse) return; + if (recurse) return; recurse = true; vsnprintf(buf,sizeof(buf)-1,fmt,args); recurse = false; From c38af8f7c14f33d22832bb24fdacfd41b20ad69f Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:25:41 -0800 Subject: [PATCH 056/293] merge d1d65fbc: make max error messages configurable --- include/mimalloc.h | 1 + src/options.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index b63ed79d..bc817f54 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -271,6 +271,7 @@ typedef enum mi_option_e { mi_option_eager_commit_delay, mi_option_segment_reset, mi_option_os_tag, + mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index 0b3c6c97..0bee74e0 100644 --- a/src/options.c +++ b/src/options.c @@ -14,6 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // toupper #include +static uintptr_t mi_max_error_count = 16; // stop outputting errors after this + static void mi_add_stderr_output(); int mi_version(void) mi_attr_noexcept { @@ -68,7 +70,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 100, UNINIT, MI_OPTION(os_tag) } // only apple specific for now but might serve more or less related purpose + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; static void mi_option_init(mi_option_desc_t* desc); @@ -85,6 +88,7 @@ void _mi_options_init(void) { _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } } + mi_max_error_count = mi_option_get(mi_option_max_errors); } long mi_option_get(mi_option_t option) { @@ -275,7 +279,7 @@ void _mi_verbose_message(const char* fmt, ...) { void _mi_error_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, "mimalloc: error: ", fmt, args); @@ -285,7 +289,7 @@ void _mi_error_message(const char* fmt, ...) { void _mi_warning_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args); From 3d0a1e249fa113e93792838a00a7acd9fc98aa34 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 4 Nov 2019 09:40:10 -0800 Subject: [PATCH 057/293] remove all floating point types and arithmetic --- src/arena.c | 1 - src/init.c | 3 +-- src/stats.c | 32 +++++++++++++++++++------------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/arena.c b/src/arena.c index 6faf7d3e..e58d2c47 100644 --- a/src/arena.c +++ b/src/arena.c @@ -25,7 +25,6 @@ with on-demand coalescing. // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -//int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); diff --git a/src/init.c b/src/init.c index ed15aeba..ef848de4 100644 --- a/src/init.c +++ b/src/init.c @@ -433,8 +433,7 @@ static void mi_process_load(void) { } if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - // double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) + size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); mi_reserve_huge_os_pages_interleave(pages); } } diff --git a/src/stats.c b/src/stats.c index a1248043..011fab64 100644 --- a/src/stats.c +++ b/src/stats.c @@ -130,19 +130,23 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char buf[32]; int len = 32; const char* suffix = (unit <= 0 ? " " : "b"); - double base = (unit == 0 ? 1000.0 : 1024.0); + const int64_t base = (unit == 0 ? 1000 : 1024); if (unit>0) n *= unit; - double pos = (double)(n < 0 ? -n : n); - if (pos < base) - snprintf(buf,len, "%d %s ", (int)n, suffix); - else if (pos < base*base) - snprintf(buf, len, "%.1f k%s", (double)n / base, suffix); - else if (pos < base*base*base) - snprintf(buf, len, "%.1f m%s", (double)n / (base*base), suffix); - else - snprintf(buf, len, "%.1f g%s", (double)n / (base*base*base), suffix); - + const int64_t pos = (n < 0 ? -n : n); + if (pos < base) { + snprintf(buf, len, "%d %s ", (int)n, suffix); + } + else { + int64_t divider = base; + const char* magnitude = "k"; + if (pos >= divider*base) { divider *= base; magnitude = "m"; } + if (pos >= divider*base) { divider *= base; magnitude = "g"; } + const int64_t tens = (n / (divider/10)); + const long whole = (long)(tens/10); + const long frac1 = (long)(tens%10); + snprintf(buf, len, "%ld.%ld %s%s", whole, frac1, magnitude, suffix); + } _mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf); } @@ -199,8 +203,10 @@ static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg } static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) { - double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count); - _mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg); + const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); + const long avg_whole = (long)(avg_tens/10); + const long avg_frac1 = (long)(avg_tens%10); + _mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1); } From 829fd872f407c5e201cd844b8f26f2c87915e89b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 4 Nov 2019 11:48:41 -0800 Subject: [PATCH 058/293] initial delay slots --- include/mimalloc-internal.h | 11 ++- include/mimalloc-types.h | 26 +++++-- include/mimalloc.h | 1 + src/heap.c | 2 +- src/init.c | 4 +- src/memory.c | 143 +++++++++++++++++++++++++++++++----- src/options.c | 1 + src/segment.c | 31 ++++---- src/stats.c | 2 +- 9 files changed, 171 insertions(+), 50 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 413f76e6..25a3d93d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -61,15 +61,15 @@ int _mi_os_numa_node_count(void); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); +void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld); -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); bool _mi_mem_protect(void* addr, size_t size); bool _mi_mem_unprotect(void* addr, size_t size); -void _mi_mem_collect(mi_stats_t* stats); +void _mi_mem_collect(mi_os_tld_t* tld); // "segment.c" mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); @@ -107,7 +107,6 @@ uintptr_t _mi_heap_random(mi_heap_t* heap); // "stats.c" void _mi_stats_done(mi_stats_t* stats); -typedef int64_t mi_msecs_t; mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 99b6b22b..8a3ffff4 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -385,6 +385,19 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) +// ------------------------------------------------------ +// Delay slots (to avoid expensive OS calls) +// ------------------------------------------------------ +typedef int64_t mi_msecs_t; + +typedef struct mi_delay_slot_s { + mi_msecs_t expire; + uint8_t* addr; + size_t size; +} mi_delay_slot_t; + +#define MI_RESET_DELAY_SLOTS (128) + // ------------------------------------------------------ // Thread Local data // ------------------------------------------------------ @@ -395,6 +408,12 @@ typedef struct mi_segment_queue_s { mi_segment_t* last; } mi_segment_queue_t; +// OS thread local data +typedef struct mi_os_tld_s { + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats + mi_delay_slot_t reset_delay[MI_RESET_DELAY_SLOTS]; +} mi_os_tld_t; // Segments thread local data typedef struct mi_segments_tld_s { @@ -408,14 +427,9 @@ typedef struct mi_segments_tld_s { size_t cache_size; // total size of all segments in the cache mi_segment_t* cache; // (small) cache of segments mi_stats_t* stats; // points to tld stats + mi_os_tld_t* os; // points to os stats } mi_segments_tld_t; -// OS thread local data -typedef struct mi_os_tld_s { - size_t region_idx; // start point for next allocation - mi_stats_t* stats; // points to tld stats -} mi_os_tld_t; - // Thread local data struct mi_tld_s { unsigned long long heartbeat; // monotonic heartbeat count diff --git a/include/mimalloc.h b/include/mimalloc.h index c03ddc1e..e6fa9c2b 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -275,6 +275,7 @@ typedef enum mi_option_e { mi_option_reset_decommits, mi_option_eager_commit_delay, mi_option_segment_reset, + mi_option_reset_delay, mi_option_os_tag, mi_option_max_numa_node, mi_option_max_errors, diff --git a/src/heap.c b/src/heap.c index 162cf406..d03925d5 100644 --- a/src/heap.c +++ b/src/heap.c @@ -149,7 +149,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect regions if (collect >= FORCE && _mi_is_main_thread()) { - _mi_mem_collect(&heap->tld->stats); + _mi_mem_collect(&heap->tld->os); } } diff --git a/src/init.c b/src/init.c index ef848de4..971a93c0 100644 --- a/src/init.c +++ b/src/init.c @@ -94,11 +94,12 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) +#define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) static mi_tld_t tld_main = { 0, false, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments + { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments { 0, tld_main_stats }, // os { MI_STATS_NULL } // stats }; @@ -218,6 +219,7 @@ static bool _mi_heap_init(void) { memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; tld->segments.stats = &tld->stats; + tld->segments.os = &tld->os; tld->os.stats = &tld->stats; _mi_heap_default = heap; } diff --git a/src/memory.c b/src/memory.c index 75a1df92..e12405c1 100644 --- a/src/memory.c +++ b/src/memory.c @@ -53,6 +53,9 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +// local +static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size); + // Constants #if (MI_INTPTR_SIZE==8) @@ -470,16 +473,19 @@ Free -----------------------------------------------------------------------------*/ // Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { - mi_assert_internal(size > 0 && stats != NULL); +void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { + mi_assert_internal(size > 0 && tld != NULL); if (p==NULL) return; if (size==0) return; + + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + size_t arena_memid = 0; size_t idx = 0; size_t bitidx = 0; if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) { // was a direct arena allocation, pass through - _mi_arena_free(p, size, arena_memid, stats); + _mi_arena_free(p, size, arena_memid, tld->stats); } else { // allocated in a region @@ -512,14 +518,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments { - _mi_os_reset(p, size, stats); + _mi_os_reset(p, size, tld->stats); // cannot use delay reset! (due to concurrent allocation in the same region) //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? } } } if (!is_eager_committed) { // adjust commit statistics as we commit again when re-using the same slot - _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); + _mi_stat_decrease(&tld->stats->committed, mi_good_commit_size(size)); } // TODO: should we free empty regions? currently only done _mi_mem_collect. @@ -539,7 +545,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- collection -----------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_stats_t* stats) { +void _mi_mem_collect(mi_os_tld_t* tld) { // free every region that has no segments in use. for (size_t i = 0; i < regions_count; i++) { mem_region_t* region = ®ions[i]; @@ -554,7 +560,8 @@ void _mi_mem_collect(mi_stats_t* stats) { bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats); + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE); + _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); } // and release mi_atomic_write(®ion->info,0); @@ -564,25 +571,123 @@ void _mi_mem_collect(mi_stats_t* stats) { } } +/* ---------------------------------------------------------------------------- + Delay slots +-----------------------------------------------------------------------------*/ + +typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); + +static void mi_delay_insert(mi_delay_slot_t* slots, size_t count, + mi_msecs_t delay, uint8_t* addr, size_t size, + mi_delay_resolve_fun* resolve, void* arg) +{ + if (delay==0) { + resolve(addr, size, arg); + return; + } + + mi_msecs_t now = _mi_clock_now(); + mi_delay_slot_t* oldest = slots; + // walk through all slots, resolving expired ones. + // remember the oldest slot to insert the new entry in. + for (size_t i = 0; i < count; i++) { + mi_delay_slot_t* slot = &slots[i]; + + if (slot->expire == 0) { + // empty slot + oldest = slot; + } + // TODO: should we handle overlapping areas too? + else if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { + // earlier slot encompasses new area, increase expiration + slot->expire = now + delay; + delay = 0; + } + else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { + // new one encompasses old slot, overwrite + slot->expire = now + delay; + slot->addr = addr; + slot->size = size; + delay = 0; + } + else if (slot->expire < now) { + // expired slot, resolve now + slot->expire = 0; + resolve(slot->addr, slot->size, arg); + } + else if (oldest->expire > slot->expire) { + oldest = slot; + } + } + if (delay>0) { + // not yet registered, use the oldest slot + if (oldest->expire > 0) { + resolve(oldest->addr, oldest->size, arg); // evict if not empty + } + oldest->expire = now + delay; + oldest->addr = addr; + oldest->size = size; + } +} + +static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size) +{ + uint8_t* addr = (uint8_t*)p; + bool done = false; + // walk through all slots + for (size_t i = 0; i < count; i++) { + mi_delay_slot_t* slot = &slots[i]; + if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { + // earlier slot encompasses the area; remove it + slot->expire = 0; + done = true; + } + else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { + // new one encompasses old slot, remove it + slot->expire = 0; + } + else if ((addr <= slot->addr && addr + size > slot->addr) || + (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { + // partial overlap, remove slot + mi_assert_internal(false); + slot->expire = 0; + } + } + return done; +} + +static void mi_resolve_reset(void* p, size_t size, void* vtld) { + mi_os_tld_t* tld = (mi_os_tld_t*)vtld; + _mi_os_reset(p, size, tld->stats); +} + +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { + mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay), + (uint8_t*)p, size, &mi_resolve_reset, tld); + return true; +} + +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) { + return _mi_os_unreset(p, size, is_zero, tld->stats); + } + return true; +} + + /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_commit(p, size, is_zero, stats); +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + return _mi_os_commit(p, size, is_zero, tld->stats); } -bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_decommit(p, size, stats); -} - -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_reset(p, size, stats); -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_unreset(p, size, is_zero, stats); +bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + return _mi_os_decommit(p, size, tld->stats); } bool _mi_mem_protect(void* p, size_t size) { diff --git a/src/options.c b/src/options.c index 63b1612a..e098af0b 100644 --- a/src/options.c +++ b/src/options.c @@ -70,6 +70,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/segment.c b/src/segment.c index 178e0eda..b9abe2b3 100644 --- a/src/segment.c +++ b/src/segment.c @@ -234,7 +234,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_assert_internal(!segment->mem_is_fixed); _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set } - _mi_mem_free(segment, segment_size, segment->memid, tld->stats); + _mi_mem_free(segment, segment_size, segment->memid, tld->os); } @@ -281,7 +281,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) { - _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); + _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->os); } segment->next = tld->cache; tld->cache = segment; @@ -346,13 +346,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) { mi_assert_internal(!segment->mem_is_fixed); - _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats); + _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->os); segment->mem_is_committed = true; } if (!segment->mem_is_fixed && (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) { bool reset_zero = false; - _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats); + _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->os); if (reset_zero) is_zero = true; } } @@ -365,7 +365,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, if (!commit) { // ensure the initial info is committed bool commit_zero = false; - _mi_mem_commit(segment, info_size, &commit_zero, tld->stats); + _mi_mem_commit(segment, info_size, &commit_zero, tld->os); if (commit_zero) is_zero = true; } segment->memid = memid; @@ -459,7 +459,7 @@ static bool mi_segment_has_free(const mi_segment_t* segment) { return (segment->used < segment->capacity); } -static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) { +static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_assert_expensive(mi_segment_is_valid(segment)); for (size_t i = 0; i < segment->capacity; i++) { @@ -472,14 +472,14 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) mi_assert_internal(!segment->mem_is_fixed); page->is_committed = true; bool is_zero = false; - _mi_mem_commit(start,psize,&is_zero,stats); + _mi_mem_commit(start,psize,&is_zero,tld->os); if (is_zero) page->is_zero_init = true; } if (page->is_reset) { mi_assert_internal(!segment->mem_is_fixed); page->is_reset = false; bool is_zero = false; - _mi_mem_unreset(start, psize, &is_zero, stats); + _mi_mem_unreset(start, psize, &is_zero, tld->os); if (is_zero) page->is_zero_init = true; } } @@ -497,21 +497,20 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) { - UNUSED(stats); +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); size_t inuse = page->capacity * page->block_size; - _mi_stat_decrease(&stats->page_committed, inuse); - _mi_stat_decrease(&stats->pages, 1); + _mi_stat_decrease(&tld->stats->page_committed, inuse); + _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; - _mi_mem_reset(start, psize, stats); + _mi_mem_reset(start, psize, tld->os); } // zero the page data, but not the segment fields @@ -529,7 +528,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_assert_expensive(mi_segment_is_valid(segment)); // mark it as free now - mi_segment_page_clear(segment, page, tld->stats); + mi_segment_page_clear(segment, page, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment @@ -634,7 +633,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen _mi_stat_decrease(&tld->stats->pages_abandoned, 1); if (mi_page_all_free(page)) { // if everything free by now, free the page - mi_segment_page_clear(segment,page,tld->stats); + mi_segment_page_clear(segment,page,tld); } else { // otherwise reclaim it @@ -666,7 +665,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen // Requires that the page has free pages static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); - mi_page_t* page = mi_segment_find_free(segment, tld->stats); + mi_page_t* page = mi_segment_find_free(segment, tld); page->segment_in_use = true; segment->used++; mi_assert_internal(segment->used <= segment->capacity); diff --git a/src/stats.c b/src/stats.c index 011fab64..cb6d8866 100644 --- a/src/stats.c +++ b/src/stats.c @@ -206,7 +206,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); const long avg_frac1 = (long)(avg_tens%10); - _mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1); + _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); } From 288726606390edb4ffb9664b9bce0271516b550d Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 6 Nov 2019 14:17:36 -0800 Subject: [PATCH 059/293] optimize get numa node for single node systems --- src/os.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/os.c b/src/os.c index 44ef9830..254f85f1 100644 --- a/src/os.c +++ b/src/os.c @@ -1046,9 +1046,10 @@ int _mi_os_numa_node_count(void) { int _mi_os_numa_node(mi_os_tld_t* tld) { UNUSED(tld); - int numa_node = mi_os_numa_nodex(); - // never more than the node count and >= 0 int numa_count = _mi_os_numa_node_count(); + if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 + // never more than the node count and >= 0 + int numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } if (numa_node < 0) numa_node = 0; return numa_node; From 00e19cad9abd225bb4c0975c4f9b6e440a81b97c Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 6 Nov 2019 21:37:23 -0800 Subject: [PATCH 060/293] refactor region code, split out atomic bitmap --- ide/vs2019/mimalloc-override.vcxproj | 2 +- ide/vs2019/mimalloc.vcxproj | 3 +- include/mimalloc-atomic.h | 31 ++- src/bitmap.inc.c | 160 +++++++++++++ src/memory.c | 339 ++++++++++----------------- 5 files changed, 318 insertions(+), 217 deletions(-) create mode 100644 src/bitmap.inc.c diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 09fd37fb..e1c7535c 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -123,7 +123,7 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); + MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false Default diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 1fabff5e..19696c10 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -116,7 +116,7 @@ true true ../../include - MI_DEBUG=1;%(PreprocessorDefinitions); + MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false stdcpp17 @@ -218,6 +218,7 @@ + diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index dff0f011..c18f990f 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -36,6 +36,13 @@ static inline void mi_atomic_add64(volatile int64_t* p, int64_t add); // Atomically add a value; returns the previous value. Memory ordering is relaxed. static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add); +// Atomically "and" a value; returns the previous value. Memory ordering is relaxed. +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x); + +// Atomically "or" a value; returns the previous value. Memory ordering is relaxed. +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x); + + // Atomically compare and exchange a value; returns `true` if successful. // May fail spuriously. Memory ordering as release on success, and relaxed on failure. // (Note: expected and desired are in opposite order from atomic_compare_exchange) @@ -121,22 +128,28 @@ static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exc #include #ifdef _WIN64 typedef LONG64 msc_intptr_t; -#define RC64(f) f##64 +#define MI_64(f) f##64 #else typedef LONG msc_intptr_t; -#define RC64(f) f +#define MI_64(f) f #endif static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { - return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); + return (intptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); +} +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); +} +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { - return (expected == (uintptr_t)RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); + return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); } static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { return mi_atomic_cas_strong(p,desired,expected); } static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { - return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); + return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { return *p; @@ -177,6 +190,14 @@ static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add MI_USING_STD return atomic_fetch_add_explicit(p, add, memory_order_relaxed); } +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + MI_USING_STD + return atomic_fetch_and_explicit(p, x, memory_order_relaxed); +} +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + MI_USING_STD + return atomic_fetch_or_explicit(p, x, memory_order_relaxed); +} static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { MI_USING_STD return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed); diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c new file mode 100644 index 00000000..5bea4748 --- /dev/null +++ b/src/bitmap.inc.c @@ -0,0 +1,160 @@ +#pragma once +#ifndef MI_BITMAP_H +#define MI_BITMAP_H + +#include "mimalloc.h" +#include "mimalloc-internal.h" + +// Use bit scan forward to quickly find the first zero bit if it is available +#if defined(_MSC_VER) +#define MI_HAVE_BITSCAN +#include +static inline size_t mi_bsf(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanForward)(&idx, x); + return idx; +} +static inline size_t mi_bsr(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanReverse)(&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +#define MI_HAVE_BITSCAN +#if (INTPTR_MAX == LONG_MAX) +# define MI_L(x) x##l +#else +# define MI_L(x) x##ll +#endif +static inline size_t mi_bsf(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x)); +} +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x)); +} +#endif + + +#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE) +#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set + +// An atomic bitmap of `uintptr_t` fields +typedef volatile _Atomic(uintptr_t) mi_bitmap_field_t; +typedef mi_bitmap_field_t* mi_bitmap_t; + +// A bitmap index is the index of the bit in a bitmap. +typedef size_t mi_bitmap_index_t; + +// Create a bit index. +static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) { + mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS); + return (idx*MI_BITMAP_FIELD_BITS) + bitidx; +} + +// Get the field index from a bit index. +static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx / MI_BITMAP_FIELD_BITS); +} + +// Get the bit index in a bitmap field +static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx % MI_BITMAP_FIELD_BITS); +} + +// The bit mask for a given number of blocks at a specified bit index. +static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { + mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); + return ((((uintptr_t)1 << count) - 1) << bitidx); +} + +// Try to atomically claim a sequence of `count` bits in a single field at `idx` in `bitmap`. +// Returns `true` on success. +static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +{ + mi_assert_internal(bitmap_idx != NULL); + volatile _Atomic(uintptr_t)* field = &bitmap[idx]; + uintptr_t map = mi_atomic_read(field); + if (map==MI_BITMAP_FIELD_FULL) return false; // short cut + + // search for 0-bit sequence of length count + const uintptr_t mask = mi_bitmap_mask_(count, 0); + const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; + +#ifdef MI_HAVE_BITSCAN + size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible +#else + size_t bitidx = 0; // otherwise start at 0 +#endif + uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx + + // scan linearly for a free range of zero bits + while (bitidx <= bitidx_max) { + if ((map & m) == 0) { // are the mask bits free at bitidx? + mi_assert_internal((m >> bitidx) == mask); // no overflow? + uintptr_t newmap = map | m; + mi_assert_internal((newmap^map) >> bitidx == mask); + if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here? + // no success, another thread claimed concurrently.. keep going + map = mi_atomic_read(field); + continue; + } + else { + // success, we claimed the bits! + *bitmap_idx = mi_bitmap_index_create(idx, bitidx); + return true; + } + } + else { + // on to the next bit range +#ifdef MI_HAVE_BITSCAN + size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); + mi_assert_internal(shift > 0 && shift <= count); +#else + size_t shift = 1; +#endif + bitidx += shift; + m <<= shift; + } + } + // no bits found + return false; +} + + +// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. +static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { + for (size_t idx = 0; idx < bitmap_fields; idx++) { + if (mi_bitmap_try_claim_field(bitmap, idx, count, bitmap_idx)) { + return true; + } + } + return false; +} + +// Set `count` bits at `bitmap_idx` to 0 atomically +static inline void mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + mi_assert_internal((bitmap[idx] & mask) == mask); + mi_atomic_and(&bitmap[idx], ~mask); +} + + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously +static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + // mi_assert_internal((bitmap[idx] & mask) == 0); + uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); + return ((prev & mask) == 0); +} + +#endif \ No newline at end of file diff --git a/src/memory.c b/src/memory.c index 75a1df92..29e0e412 100644 --- a/src/memory.c +++ b/src/memory.c @@ -37,6 +37,8 @@ Possible issues: #include // memset +#include "bitmap.inc.c" + // Internal raw OS interface size_t _mi_os_large_page_size(); bool _mi_os_protect(void* addr, size_t size); @@ -56,22 +58,22 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo // Constants #if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map +#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 16KiB for the region map #elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map +#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // 196 bytes for the region map #else #error "define the maximum heap space allowed for regions on this platform" #endif #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8) -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS) -#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) -#define MI_REGION_MAP_FULL UINTPTR_MAX +#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB +#define MI_REGION_MAX_ALLOC_SIZE (MI_REGION_SIZE/4) // 64MiB +#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) +// Region info is a pointer to the memory region and two bits for +// its flags: is_large, and is_committed. typedef uintptr_t mi_region_info_t; static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { @@ -88,19 +90,22 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block - volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags - volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd + volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) - size_t arena_memid; // if allocated from a (huge page) arena + size_t arena_memid; // if allocated from a (huge page) arena } mem_region_t; - // The region map; 16KiB for a 256GiB HEAP_REGION_MAX -// TODO: in the future, maintain a map per NUMA node for numa aware allocation static mem_region_t regions[MI_REGION_MAX]; -static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions +// A bit mask per region for its claimed MI_SEGMENT_SIZE blocks. +static mi_bitmap_field_t regions_map[MI_REGION_MAX]; + +// A bit mask per region to track which blocks are dirty (= potentially written to) +static mi_bitmap_field_t regions_dirty[MI_REGION_MAX]; + +// Allocated regions +static volatile _Atomic(uintptr_t) regions_count; // = 0; /* ---------------------------------------------------------------------------- @@ -113,12 +118,6 @@ static size_t mi_region_block_count(size_t size) { return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE; } -// The bit mask for a given number of blocks at a specified bit index. -static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) { - mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS); - return ((((uintptr_t)1 << blocks) - 1) << bitidx); -} - // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. static size_t mi_good_commit_size(size_t size) { if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; @@ -137,8 +136,8 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { } -static size_t mi_memid_create(size_t idx, size_t bitidx) { - return ((idx*MI_REGION_MAP_BITS) + bitidx)<<1; +static size_t mi_memid_create(mi_bitmap_index_t bitmap_idx) { + return bitmap_idx<<1; } static size_t mi_memid_create_from_arena(size_t arena_memid) { @@ -149,78 +148,57 @@ static bool mi_memid_is_arena(size_t id) { return ((id&1)==1); } -static bool mi_memid_indices(size_t id, size_t* idx, size_t* bitidx, size_t* arena_memid) { +static bool mi_memid_indices(size_t id, mi_bitmap_index_t* bitmap_idx, size_t* arena_memid) { if (mi_memid_is_arena(id)) { *arena_memid = (id>>1); return true; } else { - *idx = ((id>>1) / MI_REGION_MAP_BITS); - *bitidx = ((id>>1) % MI_REGION_MAP_BITS); + *bitmap_idx = (mi_bitmap_index_t)(id>>1); return false; } } /* ---------------------------------------------------------------------------- -Commit from a region + Ensure a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ -// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, - size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) +static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_info_t* pinfo, mi_os_tld_t* tld) { - size_t mask = mi_region_block_mask(blocks,bitidx); - mi_assert_internal(mask != 0); - mi_assert_internal((mask & mi_atomic_read_relaxed(®ion->map)) == mask); - mi_assert_internal(®ions[idx] == region); - // ensure the region is reserved - mi_region_info_t info = mi_atomic_read(®ion->info); - if (info == 0) + mi_region_info_t info = mi_atomic_read(®ions[idx].info); + if (mi_unlikely(info == 0)) { bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); - bool region_large = *allow_large; + bool region_large = allow_large; + bool is_zero = false; size_t arena_memid = 0; - void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, is_zero, &arena_memid, tld); - /* - void* start = NULL; - if (region_large) { - start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN); - if (start != NULL) { region_commit = true; } - } - if (start == NULL) { - start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); - } - */ - mi_assert_internal(!(region_large && !*allow_large)); + void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); + mi_assert_internal(!(region_large && !allow_large)); if (start == NULL) { - // failure to allocate from the OS! unclaim the blocks and fail - size_t map; - do { - map = mi_atomic_read_relaxed(®ion->map); - } while (!mi_atomic_cas_weak(®ion->map, map & ~mask, map)); + // failure to allocate from the OS! fail + *pinfo = 0; return false; } // set the newly allocated region - info = mi_region_info_create(start,region_large,region_commit); - if (mi_atomic_cas_strong(®ion->info, info, 0)) { + info = mi_region_info_create(start, region_large, region_commit); + if (mi_atomic_cas_strong(®ions[idx].info, info, 0)) { // update the region count - region->arena_memid = arena_memid; - mi_atomic_write(®ion->numa_node, _mi_os_numa_node(tld) + 1); + regions[idx].arena_memid = arena_memid; + mi_atomic_write(®ions[idx].numa_node, _mi_os_numa_node(tld) + 1); + mi_atomic_write(®ions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0)); mi_atomic_increment(®ions_count); } else { // failed, another thread allocated just before us! // we assign it to a later slot instead (up to 4 tries). - for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { + for (size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { regions[idx+i].arena_memid = arena_memid; mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1); + mi_atomic_write(®ions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0)); mi_atomic_increment(®ions_count); start = NULL; break; @@ -232,27 +210,33 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); } // and continue with the memory at our index - info = mi_atomic_read(®ion->info); + info = mi_atomic_read(®ions[idx].info); } } - mi_assert_internal(info == mi_atomic_read(®ion->info)); + mi_assert_internal(info == mi_atomic_read(®ions[idx].info)); mi_assert_internal(info != 0); + *pinfo = info; + return true; +} + + +/* ---------------------------------------------------------------------------- + Commit blocks +-----------------------------------------------------------------------------*/ + +static void* mi_region_commit_blocks(mi_bitmap_index_t bitmap_idx, mi_region_info_t info, size_t blocks, size_t size, bool* commit, bool* is_large, bool* is_zero, mi_os_tld_t* tld) +{ + // set dirty bits + *is_zero = mi_bitmap_claim(regions_dirty, MI_REGION_MAX, blocks, bitmap_idx); // Commit the blocks to memory bool region_is_committed = false; bool region_is_large = false; - void* start = mi_region_info_read(info,®ion_is_large,®ion_is_committed); - mi_assert_internal(!(region_is_large && !*allow_large)); + void* start = mi_region_info_read(info, ®ion_is_large, ®ion_is_committed); + mi_assert_internal(!(region_is_large && !*is_large)); mi_assert_internal(start!=NULL); - // set dirty bits - uintptr_t m; - do { - m = mi_atomic_read(®ion->dirty_mask); - } while (!mi_atomic_cas_weak(®ion->dirty_mask, m | mask, m)); - *is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range? - - void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); + void* blocks_start = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bitmap_idx) * MI_SEGMENT_SIZE); if (*commit && !region_is_committed) { // ensure commit bool commit_zero = false; @@ -266,99 +250,58 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // and return the allocation mi_assert_internal(blocks_start != NULL); - *allow_large = region_is_large; - *p = blocks_start; - *id = mi_memid_create(idx, bitidx); + *is_large = region_is_large; + return blocks_start; +} + +/* ---------------------------------------------------------------------------- + Claim and allocate blocks in a region +-----------------------------------------------------------------------------*/ + +static bool mi_region_alloc_blocks( + size_t idx, size_t blocks, size_t size, + bool* commit, bool* allow_large, bool* is_zero, + void** p, size_t* id, mi_os_tld_t* tld) +{ + mi_bitmap_index_t bitmap_idx; + if (!mi_bitmap_try_claim_field(regions_map, idx, blocks, &bitmap_idx)) { + return true; // no error, but also no success + } + mi_region_info_t info; + if (!mi_region_ensure_allocated(idx,*allow_large,&info,tld)) { + // failed to allocate region memory, unclaim the bits and fail + mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx); + return false; + } + *p = mi_region_commit_blocks(bitmap_idx,info,blocks,size,commit,allow_large,is_zero,tld); + *id = mi_memid_create(bitmap_idx); return true; } -// Use bit scan forward to quickly find the first zero bit if it is available -#if defined(_MSC_VER) -#define MI_HAVE_BITSCAN -#include -static inline size_t mi_bsf(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - #if (MI_INTPTR_SIZE==8) - _BitScanForward64(&idx, x); - #else - _BitScanForward(&idx, x); - #endif - return idx; -} -static inline size_t mi_bsr(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - #if (MI_INTPTR_SIZE==8) - _BitScanReverse64(&idx, x); - #else - _BitScanReverse(&idx, x); - #endif - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -#define MI_HAVE_BITSCAN -static inline size_t mi_bsf(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x)); -} -static inline size_t mi_bsr(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x)); -} -#endif -// Allocate `blocks` in a `region` at `idx` of a given `size`. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) -{ - mi_assert_internal(p != NULL && id != NULL); - mi_assert_internal(blocks < MI_REGION_MAP_BITS); +/* ---------------------------------------------------------------------------- + Try to allocate blocks in suitable regions +-----------------------------------------------------------------------------*/ - const uintptr_t mask = mi_region_block_mask(blocks, 0); - const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; - uintptr_t map = mi_atomic_read(®ion->map); - if (map==MI_REGION_MAP_FULL) return true; - - #ifdef MI_HAVE_BITSCAN - size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible - #else - size_t bitidx = 0; // otherwise start at 0 - #endif - uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx - - // scan linearly for a free range of zero bits - while(bitidx <= bitidx_max) { - if ((map & m) == 0) { // are the mask bits free at bitidx? - mi_assert_internal((m >> bitidx) == mask); // no overflow? - uintptr_t newmap = map | m; - mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { // TODO: use strong cas here? - // no success, another thread claimed concurrently.. keep going - map = mi_atomic_read(®ion->map); - continue; - } - else { - // success, we claimed the bits - // now commit the block memory -- this can still fail - return mi_region_commit_blocks(region, idx, bitidx, blocks, - size, commit, allow_large, is_zero, p, id, tld); - } - } - else { - // on to the next bit range - #ifdef MI_HAVE_BITSCAN - size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); - mi_assert_internal(shift > 0 && shift <= blocks); - #else - size_t shift = 1; - #endif - bitidx += shift; - m <<= shift; - } +static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool allow_large ) { + uintptr_t m = mi_atomic_read_relaxed(®ions_map[idx]); + if (m == MI_BITMAP_FIELD_FULL) return false; + if (numa_node >= 0) { // use negative numa node to always succeed + int rnode = ((int)mi_atomic_read_relaxed(®ions->numa_node)) - 1; + if (rnode != numa_node) return false; + } + if (mi_unlikely(!(commit || allow_large))) { + // otherwise skip incompatible regions if possible. + // this is not guaranteed due to multiple threads allocating at the same time but + // that's ok. In secure mode, large is never allowed for any thread, so that works out; + // otherwise we might just not be able to reset/decommit individual pages sometimes. + mi_region_info_t info = mi_atomic_read_relaxed(®ions->info); + bool is_large; + bool is_committed; + void* start = mi_region_info_read(info, &is_large, &is_committed); + bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation? + if (!ok) return false; } - // no error, but also no bits found return true; } @@ -366,33 +309,15 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_try_alloc_blocks(int numa_node, size_t idx, size_t blocks, size_t size, +static bool mi_region_try_alloc_blocks( + int numa_node, size_t idx, size_t blocks, size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) { // check if there are available blocks in the region.. mi_assert_internal(idx < MI_REGION_MAX); - mem_region_t* region = ®ions[idx]; - uintptr_t m = mi_atomic_read_relaxed(®ion->map); - int rnode = ((int)mi_atomic_read_relaxed(®ion->numa_node)) - 1; - if ((rnode < 0 || rnode == numa_node) && // fits current numa node - (m != MI_REGION_MAP_FULL)) // and some bits are zero - { - bool ok = (*commit || *allow_large); // committing or allow-large is always ok - if (!ok) { - // otherwise skip incompatible regions if possible. - // this is not guaranteed due to multiple threads allocating at the same time but - // that's ok. In secure mode, large is never allowed for any thread, so that works out; - // otherwise we might just not be able to reset/decommit individual pages sometimes. - mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info,&is_large,&is_committed); - ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation? - } - if (ok) { - return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld); - } + if (mi_region_is_suitable(numa_node, idx, *commit, *allow_large)) { + return mi_region_alloc_blocks(idx, blocks, size, commit, allow_large, is_zero, p, id, tld); } return true; // no error, but no success either } @@ -426,14 +351,14 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l size = _mi_align_up(size, _mi_os_page_size()); // calculate the number of needed blocks - size_t blocks = mi_region_block_count(size); + const size_t blocks = mi_region_block_count(size); mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); // find a range of free blocks - int numa_node = _mi_os_numa_node(tld); + const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); void* p = NULL; - size_t count = mi_atomic_read(®ions_count); - size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? + const size_t count = mi_atomic_read(®ions_count); + size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error @@ -456,7 +381,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l *id = mi_memid_create_from_arena(arena_memid); } else { - tld->region_idx = idx; // next start of search? currently not used as we use first-fit + tld->region_idx = idx; // next start of search } mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); @@ -475,9 +400,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { if (p==NULL) return; if (size==0) return; size_t arena_memid = 0; - size_t idx = 0; - size_t bitidx = 0; - if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) { + mi_bitmap_index_t bitmap_idx; + if (mi_memid_indices(id,&bitmap_idx,&arena_memid)) { // was a direct arena allocation, pass through _mi_arena_free(p, size, arena_memid, stats); } @@ -487,11 +411,11 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // we can align the size up to page size (as we allocate that way too) // this ensures we fully commit/decommit/reset size = _mi_align_up(size, _mi_os_page_size()); - size_t blocks = mi_region_block_count(size); - size_t mask = mi_region_block_mask(blocks, bitidx); + const size_t blocks = mi_region_block_count(size); + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? mem_region_t* region = ®ions[idx]; - mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed? mi_region_info_t info = mi_atomic_read(®ion->info); bool is_large; bool is_eager_committed; @@ -499,8 +423,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { mi_assert_internal(start != NULL); void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS); - if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`? + mi_assert_internal(bitidx + blocks <= MI_BITMAP_FIELD_BITS); + if (blocks_start != p || bitidx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? // decommit (or reset) the blocks to reduce the working set. // TODO: implement delayed decommit/reset as these calls are too expensive @@ -526,12 +450,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // this frees up virtual address space which might be useful on 32-bit systems? // and unclaim - uintptr_t map; - uintptr_t newmap; - do { - map = mi_atomic_read_relaxed(®ion->map); - newmap = map & ~mask; - } while (!mi_atomic_cas_weak(®ion->map, newmap, map)); + mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx); } } @@ -542,23 +461,23 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { void _mi_mem_collect(mi_stats_t* stats) { // free every region that has no segments in use. for (size_t i = 0; i < regions_count; i++) { - mem_region_t* region = ®ions[i]; - if (mi_atomic_read_relaxed(®ion->map) == 0) { + if (mi_atomic_read_relaxed(®ions_map[i]) == 0) { // if no segments used, try to claim the whole region uintptr_t m; do { - m = mi_atomic_read_relaxed(®ion->map); - } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); + m = mi_atomic_read_relaxed(®ions_map[i]); + } while(m == 0 && !mi_atomic_cas_weak(®ions_map[i], MI_BITMAP_FIELD_FULL, 0 )); if (m == 0) { // on success, free the whole region bool is_eager_committed; - void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); + void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats); + _mi_arena_free(start, MI_REGION_SIZE, regions[i].arena_memid, stats); } // and release - mi_atomic_write(®ion->info,0); - mi_atomic_write(®ion->map,0); + mi_atomic_write(®ions[i].info,0); + mi_atomic_write(®ions_dirty[i],0); + mi_atomic_write(®ions_map[i],0); } } } From b09282bc0d6e3228c556eac833331438dbe774be Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 6 Nov 2019 22:49:01 -0800 Subject: [PATCH 061/293] change arena allocator to atomic bitmap as well --- include/mimalloc.h | 4 +- src/arena.c | 268 +++++++++++++-------------------------------- src/bitmap.inc.c | 6 +- src/init.c | 4 +- src/os.c | 20 ++-- 5 files changed, 94 insertions(+), 208 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index c03ddc1e..70b6e412 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -230,8 +230,8 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; diff --git a/src/arena.c b/src/arena.c index e58d2c47..b807cd47 100644 --- a/src/arena.c +++ b/src/arena.c @@ -7,15 +7,19 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- "Arenas" are fixed area's of OS memory from which we can allocate -large blocks (>= MI_ARENA_BLOCK_SIZE, 16MiB). Currently only used to +large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). Currently only used to allocate in one arena consisting of huge OS pages -- otherwise it delegates to direct allocation from the OS. In the future, we can expose an API to manually add more arenas which is sometimes needed for embedded devices or shared memory for example. -The arena allocation needs to be thread safe and we use a lock-free scan -with on-demand coalescing. +The arena allocation needs to be thread safe and we use an atomic +bitmap to allocate. The current implementation of the bitmap can +only do this within a field (`uintptr_t`) so we can allocate at most +blocks of 2GiB (64*32MiB) and no object can cross the boundary. This +can lead to fragmentation but fortunately most objects will be regions +of 256MiB in practice. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" @@ -23,6 +27,8 @@ with on-demand coalescing. #include // memset +#include "bitmap.inc.c" // atomic bitmap + // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); void _mi_os_free(void* p, size_t size, mi_stats_t* stats); @@ -36,9 +42,11 @@ int _mi_os_numa_node_count(void); Arena allocation ----------------------------------------------------------- */ -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_ARENA_BLOCK_SIZE (4*MI_SEGMENT_ALIGN) // 16MiB -#define MI_MAX_ARENAS (64) +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE +#define MI_ARENA_BLOCK_SIZE (8*MI_SEGMENT_ALIGN) // 32MiB +#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 2GiB +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB +#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) // Block info: bit 0 contains the `in_use` bit, the upper bits the // size in count of arena blocks. @@ -48,11 +56,13 @@ typedef uintptr_t mi_block_info_t; typedef struct mi_arena_s { uint8_t* start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + size_t field_count; // number of bitmap fields int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? bool is_large; // large OS page allocated - _Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks - _Atomic(mi_block_info_t) blocks[1]; // `block_count` block info's + volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? + mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks } mi_arena_t; @@ -69,180 +79,55 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0 // Use `0` as a special id for direct OS allocated memory. #define MI_MEMID_OS 0 -static size_t mi_memid_create(size_t arena_index, size_t block_index) { +static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { mi_assert_internal(arena_index < 0xFE); - return ((block_index << 8) | ((arena_index+1) & 0xFF)); + return ((bitmap_index << 8) | ((arena_index+1) & 0xFF)); } -static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) { +static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { mi_assert_internal(memid != MI_MEMID_OS); *arena_index = (memid & 0xFF) - 1; - *block_index = (memid >> 8); + *bitmap_index = (memid >> 8); } -/* ----------------------------------------------------------- - Block info ------------------------------------------------------------ */ -static bool mi_block_is_in_use(mi_block_info_t info) { - return ((info&1) != 0); +static size_t mi_arena_block_count_of_size(size_t size) { + const size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); + const size_t bcount = asize / MI_ARENA_BLOCK_SIZE; + return bcount; } -static size_t mi_block_count(mi_block_info_t info) { - return (info>>1); -} - -static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) { - return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0)); -} - - /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ - -static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index) +static void* mi_arena_alloc(mi_arena_t* arena, size_t blocks, bool* is_zero, mi_bitmap_index_t* bitmap_idx) { - // Scan linearly through all block info's - // Skipping used ranges, coalescing free ranges on demand. - mi_assert_internal(needed_bcount > 0); - mi_assert_internal(start_idx <= arena->block_count); - mi_assert_internal(end_idx <= arena->block_count); - _Atomic(mi_block_info_t)* block = &arena->blocks[start_idx]; - _Atomic(mi_block_info_t)* end = &arena->blocks[end_idx]; - while (block < end) { - mi_block_info_t binfo = mi_atomic_read_relaxed(block); - size_t bcount = mi_block_count(binfo); - if (mi_block_is_in_use(binfo)) { - // in-use, skip ahead - mi_assert_internal(bcount > 0); - block += bcount; - } - else { - // free blocks - if (bcount==0) { - // optimization: - // use 0 initialized blocks at the end, to use single atomic operation - // initially to reduce contention (as we don't need to split) - if (block + needed_bcount > end) { - return NULL; // does not fit - } - else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) { - // ouch, someone else was quicker. Try again.. - continue; - } - else { - // we got it: return a pointer to the claimed memory - ptrdiff_t idx = (block - arena->blocks); - *is_zero = arena->is_zero_init; - *block_index = idx; - return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); - } - } - - mi_assert_internal(bcount>0); - if (needed_bcount > bcount) { -#if 0 // MI_NO_ARENA_COALESCE - block += bcount; // too small, skip to the next range - continue; -#else - // too small, try to coalesce - _Atomic(mi_block_info_t)* block_next = block + bcount; - if (block_next >= end) { - return NULL; // does not fit - } - mi_block_info_t binfo_next = mi_atomic_read(block_next); - size_t bcount_next = mi_block_count(binfo_next); - if (mi_block_is_in_use(binfo_next)) { - // next block is in use, cannot coalesce - block += (bcount + bcount_next); // skip ahea over both blocks - } - else { - // next block is free, try to coalesce - // first set the next one to being used to prevent dangling ranges - if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) { - // someone else got in before us.. try again - continue; - } - else { - if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) { // use strong to increase success chance - // someone claimed/coalesced the block in the meantime - // first free the next block again.. - bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong - mi_assert(ok); UNUSED(ok); - // and try again - continue; - } - else { - // coalesced! try again - // todo: we could optimize here to immediately claim the block if the - // coalesced size is a fit instead of retrying. Keep it simple for now. - continue; - } - } - } -#endif - } - else { // needed_bcount <= bcount - mi_assert_internal(needed_bcount <= bcount); - // it fits, claim the whole block - if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) { - // ouch, someone else was quicker. Try again.. - continue; - } - else { - // got it, now split off the needed part - if (needed_bcount < bcount) { - mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false)); - mi_atomic_write(block, mi_block_info_create(needed_bcount, true)); - } - // return a pointer to the claimed memory - ptrdiff_t idx = (block - arena->blocks); - *is_zero = false; - *block_index = idx; - return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); - } - } + const size_t fcount = arena->field_count; + size_t idx = mi_atomic_read(&arena->search_idx); // start from last search + for (size_t visited = 0; visited < fcount; visited++, idx++) { + if (idx >= fcount) idx = 0; // wrap around + if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) { + // claimed it! set the dirty bits + *is_zero = mi_bitmap_claim(arena->blocks_dirty, fcount, blocks, *bitmap_idx); + mi_atomic_write(&arena->search_idx, idx); // start search from here next time + return (arena->start + (*bitmap_idx)*MI_ARENA_BLOCK_SIZE); } } - // no success return NULL; } -// Try to reduce search time by starting from bottom and wrap around. -static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index) -{ - uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom); - void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index); - if (p == NULL && bottom > 0) { - // try again from the start - p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index); - } - if (p != NULL) { - mi_atomic_write(&arena->block_bottom, *block_index); - } - return p; -} /* ----------------------------------------------------------- Arena Allocation ----------------------------------------------------------- */ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_zero, - size_t* memid) + bool* commit, bool* large, bool* is_zero, size_t* memid) { - size_t block_index = SIZE_MAX; - void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index); + mi_bitmap_index_t bitmap_index; + void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &bitmap_index); if (p != NULL) { - mi_assert_internal(block_index != SIZE_MAX); - #if MI_DEBUG>=1 - _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; - mi_block_info_t binfo = mi_atomic_read(block); - mi_assert_internal(mi_block_is_in_use(binfo)); - mi_assert_internal(mi_block_count(binfo) >= needed_bcount); - #endif - *memid = mi_memid_create(arena_index, block_index); + *memid = mi_memid_create(arena_index, bitmap_index); *commit = true; // TODO: support commit on demand? *large = arena->is_large; } @@ -261,15 +146,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if (large==NULL) large = &default_large; // ensure `large != NULL` // try to allocate in an arena if the alignment is small enough - // and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`. - if (alignment <= MI_SEGMENT_ALIGN && - size >= 3*(MI_ARENA_BLOCK_SIZE/4) && // > 12MiB (not more than 25% waste) - !(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <16MiB - 24MiB> - ) + // and the object is not too large or too small. + if (alignment <= MI_SEGMENT_ALIGN && + size <= MI_ARENA_MAX_OBJ_SIZE && + size >= MI_ARENA_MIN_OBJ_SIZE) { - size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); - size_t bcount = asize / MI_ARENA_BLOCK_SIZE; - int numa_node = _mi_os_numa_node(tld); // current numa node + const size_t bcount = mi_arena_block_count_of_size(size); + const int numa_node = _mi_os_numa_node(tld); // current numa node mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation @@ -324,8 +207,8 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { else { // allocated in an arena size_t arena_idx; - size_t block_idx; - mi_memid_indices(memid, &arena_idx, &block_idx); + size_t bitmap_idx; + mi_memid_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); mi_assert_internal(arena != NULL); @@ -333,27 +216,17 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - mi_assert_internal(arena->block_count > block_idx); - if (arena->block_count <= block_idx) { - _mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx)); + if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) { + _mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - _Atomic(mi_block_info_t)* block = &arena->blocks[block_idx]; - mi_block_info_t binfo = mi_atomic_read_relaxed(block); - mi_assert_internal(mi_block_is_in_use(binfo)); - mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); - if (!mi_block_is_in_use(binfo)) { + const size_t blocks = mi_arena_block_count_of_size(size); + bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx); + if (!ones) { _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); return; }; - bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo); - mi_assert_internal(ok); - if (!ok) { - _mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo); - } - if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) { - mi_atomic_write(&arena->block_bottom, block_idx); - } } } @@ -365,8 +238,7 @@ static bool mi_arena_add(mi_arena_t* arena) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t))); - + uintptr_t i = mi_atomic_addu(&mi_arena_count,1); if (i >= MI_MAX_ARENAS) { mi_atomic_subu(&mi_arena_count, 1); @@ -383,40 +255,49 @@ static bool mi_arena_add(mi_arena_t* arena) { #include // ENOMEM // reserve at a specific numa node -int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, pages*500, &pages_reserved, &hsize); + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; } _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); - size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; - size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much + size_t bcount = mi_arena_block_count_of_size(hsize); + size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS; + size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } arena->block_count = bcount; - arena->start = (uint8_t*)p; - arena->block_bottom = 0; + arena->field_count = fields; + arena->start = (uint8_t*)p; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = true; arena->is_zero_init = true; - memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t)); + arena->search_idx = 0; + arena->blocks_dirty = &arena->blocks_map[bcount]; + size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; + if (post > 0) { + // don't use leftover bits at the end + mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); + mi_bitmap_claim(arena->blocks_map, fields, post, postidx); + } + mi_arena_add(arena); return 0; } // reserve huge pages evenly among all numa nodes. -int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept { if (pages == 0) return 0; // pages per numa node @@ -424,12 +305,13 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { if (numa_count <= 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; + const size_t timeout_per = (timeout_msecs / numa_count) + 50; // reserve evenly among numa nodes for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 if ((size_t)numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, numa_node); + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; @@ -446,7 +328,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; - int err = mi_reserve_huge_os_pages_interleave(pages); + int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0)); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 5bea4748..aeb185d1 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -135,13 +135,15 @@ static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, } // Set `count` bits at `bitmap_idx` to 0 atomically -static inline void mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { +// Returns `true` if all `count` bits were 1 previously +static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); mi_assert_internal((bitmap[idx] & mask) == mask); - mi_atomic_and(&bitmap[idx], ~mask); + uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask); + return ((prev & mask) == mask); } diff --git a/src/init.c b/src/init.c index ef848de4..f6d253f9 100644 --- a/src/init.c +++ b/src/init.c @@ -433,8 +433,8 @@ static void mi_process_load(void) { } if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - mi_reserve_huge_os_pages_interleave(pages); + size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); + mi_reserve_huge_os_pages_interleave(pages, pages*500); } } diff --git a/src/os.c b/src/os.c index 254f85f1..027df6ab 100644 --- a/src/os.c +++ b/src/os.c @@ -940,16 +940,18 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); // check for timeout - mi_msecs_t elapsed = _mi_clock_end(start_t); - if (page >= 1) { - mi_msecs_t estimate = ((elapsed / (page+1)) * pages); - if (estimate > 2*max_msecs) { // seems like we are going to timeout, break - elapsed = max_msecs + 1; + if (max_msecs > 0) { + mi_msecs_t elapsed = _mi_clock_end(start_t); + if (page >= 1) { + mi_msecs_t estimate = ((elapsed / (page+1)) * pages); + if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + elapsed = max_msecs + 1; + } + } + if (elapsed > max_msecs) { + _mi_warning_message("huge page allocation timed out\n"); + break; } - } - if (elapsed > max_msecs) { - _mi_warning_message("huge page allocation timed out\n"); - break; } } mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); From 378716c46724d839411166a0bba68b0722cf9d8b Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 7 Nov 2019 10:26:52 -0800 Subject: [PATCH 062/293] refactor and improve atomic bitmap usage --- CMakeLists.txt | 12 ++- ide/vs2019/mimalloc-override.vcxproj | 3 + ide/vs2019/mimalloc.vcxproj | 4 +- include/mimalloc-internal.h | 11 ++- include/mimalloc-types.h | 10 +-- src/arena.c | 62 +++++++-------- src/bitmap.inc.c | 110 ++++++++++++++++++--------- src/memory.c | 96 +++++++++++------------ src/page.c | 2 + test/test-stress.c | 4 +- 10 files changed, 183 insertions(+), 131 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 12540f68..0726c601 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF) +option(MI_SECURE_FULL "Use full security mitigations (like double free protection, more expensive)" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) @@ -70,9 +71,14 @@ if(MI_OVERRIDE MATCHES "ON") endif() endif() -if(MI_SECURE MATCHES "ON") - message(STATUS "Set secure build (MI_SECURE=ON)") - list(APPEND mi_defines MI_SECURE=3) +if(MI_SECURE_FULL MATCHES "ON") + message(STATUS "Set full secure build (experimental) (MI_SECURE_FULL=ON)") + list(APPEND mi_defines MI_SECURE=4) +else() + if(MI_SECURE MATCHES "ON") + message(STATUS "Set secure build (MI_SECURE=ON)") + list(APPEND mi_defines MI_SECURE=3) + endif() endif() if(MI_SEE_ASM MATCHES "ON") diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index e1c7535c..49f3d213 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -232,6 +232,9 @@ + + true + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 19696c10..bae49bab 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -218,7 +218,9 @@ - + + true + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 413f76e6..4d8b6a77 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -163,7 +163,6 @@ bool _mi_page_is_valid(mi_page_t* page); // Overflow detecting multiply -#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 #include // UINT_MAX, ULONG_MAX @@ -175,6 +174,7 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { return __builtin_umulll_overflow(count, size, total); #endif #else /* __builtin_umul_overflow is unavailable */ + #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) *total = count * size; return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); @@ -188,6 +188,7 @@ static inline bool _mi_is_power_of_two(uintptr_t x) { // Align upwards static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); uintptr_t mask = alignment - 1; if ((alignment & mask) == 0) { // power of two? return ((sz + mask) & ~mask); @@ -197,6 +198,12 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { } } +// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. +static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { + mi_assert_internal(divider != 0); + return (divider == 0 ? size : ((size + divider - 1) / divider)); +} + // Is memory zero initialized? static inline bool mi_mem_is_zero(void* p, size_t size) { for (size_t i = 0; i < size; i++) { @@ -283,7 +290,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE); + mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE); uintptr_t idx = (uintptr_t)diff >> segment->page_shift; mi_assert_internal(idx < segment->capacity); mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 99b6b22b..ced8e7a9 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. #if !defined(MI_SECURE) -#define MI_SECURE 0 +#define MI_SECURE 4 #endif // Define MI_DEBUG for debug mode @@ -93,12 +93,12 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4mb // Derived constants -#define MI_SEGMENT_SIZE (1<= MI_ARENA_BLOCK_SIZE, 32MiB). Currently only used to -allocate in one arena consisting of huge OS pages -- otherwise it -delegates to direct allocation from the OS. +large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). +In contrast to the rest of mimalloc, the arenas are shared between +threads and need to be accessed using atomic operations. -In the future, we can expose an API to manually add more arenas which -is sometimes needed for embedded devices or shared memory for example. +Currently arenas are only used to for huge OS page (1GiB) reservations, +otherwise it delegates to direct allocation from the OS. +In the future, we can expose an API to manually add more kinds of arenas +which is sometimes needed for embedded devices or shared memory for example. +(We can also employ this with WASI or `sbrk` systems to reserve large arenas + on demand and be able to reuse them efficiently). The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. The current implementation of the bitmap can @@ -48,10 +52,6 @@ int _mi_os_numa_node_count(void); #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) -// Block info: bit 0 contains the `in_use` bit, the upper bits the -// size in count of arena blocks. -typedef uintptr_t mi_block_info_t; - // A memory arena descriptor typedef struct mi_arena_s { uint8_t* start; // the start of the memory area @@ -61,8 +61,8 @@ typedef struct mi_arena_s { bool is_zero_init; // is the arena zero initialized? bool is_large; // large OS page allocated volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks - mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? - mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? + mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks } mi_arena_t; @@ -81,6 +81,7 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0 static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { mi_assert_internal(arena_index < 0xFE); + mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? return ((bitmap_index << 8) | ((arena_index+1) & 0xFF)); } @@ -90,30 +91,25 @@ static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_ *bitmap_index = (memid >> 8); } - -static size_t mi_arena_block_count_of_size(size_t size) { - const size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); - const size_t bcount = asize / MI_ARENA_BLOCK_SIZE; - return bcount; +static size_t mi_block_count_of_size(size_t size) { + return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); } /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ -static void* mi_arena_alloc(mi_arena_t* arena, size_t blocks, bool* is_zero, mi_bitmap_index_t* bitmap_idx) +static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { const size_t fcount = arena->field_count; size_t idx = mi_atomic_read(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) { - // claimed it! set the dirty bits - *is_zero = mi_bitmap_claim(arena->blocks_dirty, fcount, blocks, *bitmap_idx); mi_atomic_write(&arena->search_idx, idx); // start search from here next time - return (arena->start + (*bitmap_idx)*MI_ARENA_BLOCK_SIZE); + return true; } } - return NULL; + return false; } @@ -125,13 +121,15 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n bool* commit, bool* large, bool* is_zero, size_t* memid) { mi_bitmap_index_t bitmap_index; - void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &bitmap_index); - if (p != NULL) { - *memid = mi_memid_create(arena_index, bitmap_index); - *commit = true; // TODO: support commit on demand? - *large = arena->is_large; + if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) { + // claimed it! set the dirty bits (todo: no need for an atomic op here?) + *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index); + *memid = mi_memid_create(arena_index, bitmap_index); + *commit = true; // TODO: support commit on demand? + *large = arena->is_large; + return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE)); } - return p; + return NULL; } void* _mi_arena_alloc_aligned(size_t size, size_t alignment, @@ -140,7 +138,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, { mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); - *memid = MI_MEMID_OS; + *memid = MI_MEMID_OS; *is_zero = false; bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` @@ -151,7 +149,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size <= MI_ARENA_MAX_OBJ_SIZE && size >= MI_ARENA_MIN_OBJ_SIZE) { - const size_t bcount = mi_arena_block_count_of_size(size); + const size_t bcount = mi_block_count_of_size(size); const int numa_node = _mi_os_numa_node(tld); // current numa node mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); @@ -221,7 +219,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { _mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - const size_t blocks = mi_arena_block_count_of_size(size); + const size_t blocks = mi_block_count_of_size(size); bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx); if (!ones) { _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); @@ -268,7 +266,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec } _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); - size_t bcount = mi_arena_block_count_of_size(hsize); + size_t bcount = mi_block_count_of_size(hsize); size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS; size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? @@ -284,6 +282,8 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec arena->is_zero_init = true; arena->search_idx = 0; arena->blocks_dirty = &arena->blocks_map[bcount]; + // the bitmaps are already zero initialized due to os_alloc + // just claim leftover blocks if needed size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; if (post > 0) { // don't use leftover bits at the end diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index aeb185d1..19e6bbb8 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -1,41 +1,30 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +This file is meant to be included in other files for efficiency. +It implements a bitmap that can set/reset sequences of bits atomically +and is used to concurrently claim memory ranges. + +A bitmap is an array of fields where each field is a machine word (`uintptr_t`) + +A current limitation is that the bit sequences cannot cross fields +and that the sequence must be smaller or equal to the bits in a field. +---------------------------------------------------------------------------- */ #pragma once -#ifndef MI_BITMAP_H -#define MI_BITMAP_H +#ifndef MI_BITMAP_C +#define MI_BITMAP_C #include "mimalloc.h" #include "mimalloc-internal.h" -// Use bit scan forward to quickly find the first zero bit if it is available -#if defined(_MSC_VER) -#define MI_HAVE_BITSCAN -#include -static inline size_t mi_bsf(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - MI_64(_BitScanForward)(&idx, x); - return idx; -} -static inline size_t mi_bsr(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - MI_64(_BitScanReverse)(&idx, x); - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -#define MI_HAVE_BITSCAN -#if (INTPTR_MAX == LONG_MAX) -# define MI_L(x) x##l -#else -# define MI_L(x) x##ll -#endif -static inline size_t mi_bsf(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x)); -} -static inline size_t mi_bsr(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x)); -} -#endif - +/* ----------------------------------------------------------- + Bitmap definition +----------------------------------------------------------- */ #define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE) #define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set @@ -63,14 +52,59 @@ static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) return (bitmap_idx % MI_BITMAP_FIELD_BITS); } +// Get the full bit index +static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { + return bitmap_idx; +} + + // The bit mask for a given number of blocks at a specified bit index. static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); return ((((uintptr_t)1 << count) - 1) << bitidx); } -// Try to atomically claim a sequence of `count` bits in a single field at `idx` in `bitmap`. -// Returns `true` on success. + +/* ----------------------------------------------------------- + Use bit scan forward/reverse to quickly find the first zero bit if it is available +----------------------------------------------------------- */ +#if defined(_MSC_VER) +#define MI_HAVE_BITSCAN +#include +static inline size_t mi_bsf(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanForward)(&idx, x); + return idx; +} +static inline size_t mi_bsr(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanReverse)(&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +#include // LONG_MAX +#define MI_HAVE_BITSCAN +#if (INTPTR_MAX == LONG_MAX) +# define MI_L(x) x##l +#else +# define MI_L(x) x##ll +#endif +static inline size_t mi_bsf(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x)); +} +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x)); +} +#endif + +/* ----------------------------------------------------------- + Claim a bit sequence atomically +----------------------------------------------------------- */ + +// Try to atomically claim a sequence of `count` bits in a single +// field at `idx` in `bitmap`. Returns `true` on success. static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); @@ -93,7 +127,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con while (bitidx <= bitidx_max) { if ((map & m) == 0) { // are the mask bits free at bitidx? mi_assert_internal((m >> bitidx) == mask); // no overflow? - uintptr_t newmap = map | m; + const uintptr_t newmap = map | m; mi_assert_internal((newmap^map) >> bitidx == mask); if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here? // no success, another thread claimed concurrently.. keep going @@ -109,10 +143,10 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con else { // on to the next bit range #ifdef MI_HAVE_BITSCAN - size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); + const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); mi_assert_internal(shift > 0 && shift <= count); #else - size_t shift = 1; + const size_t shift = 1; #endif bitidx += shift; m <<= shift; diff --git a/src/memory.c b/src/memory.c index 29e0e412..bdbf1e48 100644 --- a/src/memory.c +++ b/src/memory.c @@ -16,10 +16,10 @@ We need this memory layer between the raw OS calls because of: 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order to reuse memory effectively. 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of - an OS allocation/free is still (much) too expensive relative to the accesses in that - object :-( (`malloc-large` tests this). This means we need a cheaper way to - reuse memory. -3. This layer can help with a NUMA aware allocation in the future. + an OS allocation/free is still (much) too expensive relative to the accesses + in that object :-( (`malloc-large` tests this). This means we need a cheaper + way to reuse memory. +3. This layer allows for NUMA aware allocation. Possible issues: - (2) can potentially be addressed too with a small cache per thread which is much @@ -47,8 +47,6 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -//void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -//void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); // arena.c void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); @@ -58,18 +56,18 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo // Constants #if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 16KiB for the region map +#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 40KiB for the region map #elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // 196 bytes for the region map +#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map #else #error "define the maximum heap space allowed for regions on this platform" #endif #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB +#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) #define MI_REGION_MAX_ALLOC_SIZE (MI_REGION_SIZE/4) // 64MiB -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) +#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) // Region info is a pointer to the memory region and two bits for @@ -95,7 +93,7 @@ typedef struct mem_region_s { size_t arena_memid; // if allocated from a (huge page) arena } mem_region_t; -// The region map; 16KiB for a 256GiB HEAP_REGION_MAX +// The region map static mem_region_t regions[MI_REGION_MAX]; // A bit mask per region for its claimed MI_SEGMENT_SIZE blocks. @@ -173,7 +171,7 @@ static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_i bool region_large = allow_large; bool is_zero = false; size_t arena_memid = 0; - void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); + void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); mi_assert_internal(!(region_large && !allow_large)); if (start == NULL) { @@ -183,35 +181,31 @@ static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_i } // set the newly allocated region + // try to initialize any region up to 4 beyond the current one in + // care multiple threads are doing this concurrently (common at startup) info = mi_region_info_create(start, region_large, region_commit); - if (mi_atomic_cas_strong(®ions[idx].info, info, 0)) { - // update the region count - regions[idx].arena_memid = arena_memid; - mi_atomic_write(®ions[idx].numa_node, _mi_os_numa_node(tld) + 1); - mi_atomic_write(®ions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0)); - mi_atomic_increment(®ions_count); - } - else { - // failed, another thread allocated just before us! - // we assign it to a later slot instead (up to 4 tries). - for (size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { - if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { - regions[idx+i].arena_memid = arena_memid; - mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1); - mi_atomic_write(®ions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0)); - mi_atomic_increment(®ions_count); - start = NULL; - break; - } + bool claimed = false; + for (size_t i = 0; i <= 4 && idx + i < MI_REGION_MAX && !claimed; i++) { + if (!is_zero) { + // set dirty bits before CAS; this might race with a zero block but that is ok. + // (but writing before cas prevents a concurrent allocation to assume it is not dirty) + mi_atomic_write(®ions_dirty[idx+i], MI_BITMAP_FIELD_FULL); } - if (start != NULL) { - // free it if we didn't succeed to save it to some other region - _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); - // _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); + if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { + // claimed! + regions[idx+i].arena_memid = arena_memid; + mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1); + mi_atomic_increment(®ions_count); + claimed = true; } - // and continue with the memory at our index - info = mi_atomic_read(®ions[idx].info); } + if (!claimed) { + // free our OS allocation if we didn't succeed to store it in some region + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); + } + // continue with the actual info at our index in case another thread was quicker with the allocation + info = mi_atomic_read(®ions[idx].info); + mi_assert_internal(info != 0); } mi_assert_internal(info == mi_atomic_read(®ions[idx].info)); mi_assert_internal(info != 0); @@ -290,19 +284,21 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a int rnode = ((int)mi_atomic_read_relaxed(®ions->numa_node)) - 1; if (rnode != numa_node) return false; } - if (mi_unlikely(!(commit || allow_large))) { - // otherwise skip incompatible regions if possible. - // this is not guaranteed due to multiple threads allocating at the same time but - // that's ok. In secure mode, large is never allowed for any thread, so that works out; - // otherwise we might just not be able to reset/decommit individual pages sometimes. - mi_region_info_t info = mi_atomic_read_relaxed(®ions->info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info, &is_large, &is_committed); - bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation? - if (!ok) return false; - } - return true; + if (commit && allow_large) return true; // always ok + + // otherwise skip incompatible regions if possible. + // this is not guaranteed due to multiple threads allocating at the same time but + // that's ok. In secure mode, large is never allowed for any thread, so that works out; + // otherwise we might just not be able to reset/decommit individual pages sometimes. + mi_region_info_t info = mi_atomic_read_relaxed(®ions->info); + bool is_large; + bool is_committed; + void* start = mi_region_info_read(info, &is_large, &is_committed); + // note: we also skip if commit is false and the region is committed, + // that is a bit strong but prevents allocation of eager delayed segments in + // committed memory + bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation? + return ok; } // Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. diff --git a/src/page.c b/src/page.c index 32b68edb..c5b6e370 100644 --- a/src/page.c +++ b/src/page.c @@ -497,8 +497,10 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats) { UNUSED(stats); + #if (MI_SECURE <= 2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); + #endif mi_assert_internal(page->capacity + extend <= page->reserved); void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); size_t bsize = page->block_size; diff --git a/test/test-stress.c b/test/test-stress.c index bb428072..d80cb1a4 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -66,7 +66,9 @@ static void* alloc_items(size_t items, random_t r) { if (chance(1, r)) items *= 100; // 1% huge objects; if (items==40) items++; // pthreads uses that size for stack increases uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); - for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + if (p != NULL) { + for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + } return p; } From 31d11f64d581abfd28818be65f3780506977d889 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 7 Nov 2019 10:33:45 -0800 Subject: [PATCH 063/293] fix secure free list extension where a non-empty initial free list was discarded --- include/mimalloc-types.h | 4 ++-- src/page.c | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 99b6b22b..893dcd67 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -26,7 +26,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 1 // guard page around metadata // #define MI_SECURE 2 // guard page around each mimalloc page // #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free) -// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. +// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. (cmake -DMI_SECURE_FULL=ON) #if !defined(MI_SECURE) #define MI_SECURE 0 @@ -35,7 +35,7 @@ terms of the MIT license. A copy of the license can be found in the file // Define MI_DEBUG for debug mode // #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free. // #define MI_DEBUG 2 // + internal assertion checks -// #define MI_DEBUG 3 // + extensive internal invariant checking +// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_CHECK_FULL=ON) #if !defined(MI_DEBUG) #if !defined(NDEBUG) || defined(_DEBUG) #define MI_DEBUG 2 diff --git a/src/page.c b/src/page.c index f7fad764..cb3a4bf8 100644 --- a/src/page.c +++ b/src/page.c @@ -455,8 +455,8 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si while ((extend >> shift) == 0) { shift--; } - size_t slice_count = (size_t)1U << shift; - size_t slice_extend = extend / slice_count; + const size_t slice_count = (size_t)1U << shift; + const size_t slice_extend = extend / slice_count; mi_assert_internal(slice_extend >= 1); mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice size_t counts[MI_MAX_SLICES]; // available objects in the slice @@ -470,7 +470,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si // set up first element size_t current = _mi_heap_random(heap) % slice_count; counts[current]--; - page->free = blocks[current]; + mi_block_t* const free_start = blocks[current]; // and iterate through the rest uintptr_t rnd = heap->random; for (size_t i = 1; i < extend; i++) { @@ -490,7 +490,9 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` current = next; } - mi_block_set_next(page, blocks[current], NULL); // end of the list + // prepend to the free list (usually NULL) + mi_block_set_next(page, blocks[current], page->free); // end of the list + page->free = free_start; heap->random = _mi_random_shuffle(rnd); } From 27f1a8b3d24acf0ff0bcbdacfbecd21437fb450e Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 7 Nov 2019 10:35:30 -0800 Subject: [PATCH 064/293] fix avg display; set secure default to 0` --- include/mimalloc-types.h | 2 +- src/stats.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index ddbe72f3..3f5e4e27 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. (cmake -DMI_SECURE_FULL=ON) #if !defined(MI_SECURE) -#define MI_SECURE 4 +#define MI_SECURE 0 #endif // Define MI_DEBUG for debug mode diff --git a/src/stats.c b/src/stats.c index 011fab64..cb6d8866 100644 --- a/src/stats.c +++ b/src/stats.c @@ -206,7 +206,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); const long avg_frac1 = (long)(avg_tens%10); - _mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1); + _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); } From 9b6538880768ccbe0dde86cfc0018a7b035e7911 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 7 Nov 2019 10:59:19 -0800 Subject: [PATCH 065/293] fix space leak in secure mode where a non-null free list would be discarded --- src/page.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/page.c b/src/page.c index cb3a4bf8..aaf1cb91 100644 --- a/src/page.c +++ b/src/page.c @@ -439,15 +439,15 @@ void _mi_page_retire(mi_page_t* page) { #define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) #define MI_MIN_SLICES (2) -static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) { +static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); #if (MI_SECURE<=2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); - void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL); - size_t bsize = page->block_size; + void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); + const size_t bsize = page->block_size; // initialize a randomized free list // set up `slice_count` slices to alternate between @@ -475,7 +475,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si uintptr_t rnd = heap->random; for (size_t i = 1; i < extend; i++) { // call random_shuffle only every INTPTR_SIZE rounds - size_t round = i%MI_INTPTR_SIZE; + const size_t round = i%MI_INTPTR_SIZE; if (round == 0) rnd = _mi_random_shuffle(rnd); // select a random next slice index size_t next = ((rnd >> 8*round) & (slice_count-1)); @@ -485,7 +485,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si } // and link the current block to it counts[next]--; - mi_block_t* block = blocks[current]; + mi_block_t* const block = blocks[current]; blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` current = next; @@ -496,25 +496,28 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si heap->random = _mi_random_shuffle(rnd); } -static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats) +static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); + #if (MI_SECURE <= 2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); + #endif mi_assert_internal(page->capacity + extend <= page->reserved); - void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); - size_t bsize = page->block_size; - mi_block_t* start = mi_page_block_at(page, page_area, page->capacity); + void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); + const size_t bsize = page->block_size; + mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity); // initialize a sequential free list - mi_block_t* last = mi_page_block_at(page, page_area, page->capacity + extend - 1); + mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); mi_block_t* block = start; while(block <= last) { mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); mi_block_set_next(page,block,next); block = next; } - mi_block_set_next(page, last, NULL); + // prepend to free list (usually `NULL`) + mi_block_set_next(page, last, page->free); page->free = start; } From 56887aeb2f75d0ade86120e448e66a2684c920ff Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 7 Nov 2019 10:59:45 -0800 Subject: [PATCH 066/293] add MI_SECURE_FULL=ON as a cmake option to include double free mitigation --- CMakeLists.txt | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 81cc339a..59d889b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF) +option(MI_SECURE_FULL "Use full security mitigations, may be more expensive (includes double-free mitigation)" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) @@ -66,9 +67,15 @@ if(MI_OVERRIDE MATCHES "ON") endif() endif() -if(MI_SECURE MATCHES "ON") - message(STATUS "Set secure build (MI_SECURE=ON)") - list(APPEND mi_defines MI_SECURE=3) +if(MI_SECURE_FULL MATCHES "ON") + message(STATUS "Set full secure build (may be more expensive) (MI_SECURE_FULL=ON)") + list(APPEND mi_defines MI_SECURE=4) + set(MI_SECURE "ON") +else() + if(MI_SECURE MATCHES "ON") + message(STATUS "Set secure build (MI_SECURE=ON)") + list(APPEND mi_defines MI_SECURE=3) + endif() endif() if(MI_SEE_ASM MATCHES "ON") From 13f5e6e43e9aae4043d9acc94fac67746fcd9bb4 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 7 Nov 2019 18:09:30 -0800 Subject: [PATCH 067/293] fix numa node check in regions --- src/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index bdbf1e48..fb3f5093 100644 --- a/src/memory.c +++ b/src/memory.c @@ -282,7 +282,7 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a if (m == MI_BITMAP_FIELD_FULL) return false; if (numa_node >= 0) { // use negative numa node to always succeed int rnode = ((int)mi_atomic_read_relaxed(®ions->numa_node)) - 1; - if (rnode != numa_node) return false; + if (rnode >= 0 && rnode != numa_node) return false; } if (commit && allow_large) return true; // always ok From 7b72a4cd50782563104e28becb7e181e8978449f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 8 Nov 2019 11:55:43 -0800 Subject: [PATCH 068/293] fix region suitable bug --- src/memory.c | 6 +++--- test/test-stress.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/memory.c b/src/memory.c index bdbf1e48..f8798d99 100644 --- a/src/memory.c +++ b/src/memory.c @@ -281,8 +281,8 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a uintptr_t m = mi_atomic_read_relaxed(®ions_map[idx]); if (m == MI_BITMAP_FIELD_FULL) return false; if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = ((int)mi_atomic_read_relaxed(®ions->numa_node)) - 1; - if (rnode != numa_node) return false; + int rnode = ((int)mi_atomic_read_relaxed(®ions[idx].numa_node)) - 1; + if (rnode >= 0 && rnode != numa_node) return false; } if (commit && allow_large) return true; // always ok @@ -290,7 +290,7 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a // this is not guaranteed due to multiple threads allocating at the same time but // that's ok. In secure mode, large is never allowed for any thread, so that works out; // otherwise we might just not be able to reset/decommit individual pages sometimes. - mi_region_info_t info = mi_atomic_read_relaxed(®ions->info); + mi_region_info_t info = mi_atomic_read_relaxed(®ions[idx].info); bool is_large; bool is_committed; void* start = mi_region_info_read(info, &is_large, &is_committed); diff --git a/test/test-stress.c b/test/test-stress.c index d80cb1a4..be2a9c67 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -18,7 +18,7 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 20; // scaling factor +static int N = 40; // scaling factor // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor From f0e02bab0344e099fe491eb24690a0b9a08cf6e0 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 8 Nov 2019 12:22:03 -0800 Subject: [PATCH 069/293] pr #168 by @zerodefect to update the install location --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 59d889b8..7b455881 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ option(MI_SECURE_FULL "Use full security mitigations, may be more expensiv option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) -set(mi_install_dir "lib/mimalloc-${mi_version}") +set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}") set(mi_sources src/stats.c From 9f08ddd0d0d2909998d71bf6da9bce2b048d851e Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 9 Nov 2019 19:30:53 -0800 Subject: [PATCH 070/293] refactor regions; add commit tracking on a segment basis --- src/arena.c | 9 +- src/bitmap.inc.c | 14 +- src/memory.c | 382 ++++++++++++++++++++--------------------------- src/segment.c | 2 +- 4 files changed, 181 insertions(+), 226 deletions(-) diff --git a/src/arena.c b/src/arena.c index 8feec89f..1b6cf4a4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -123,7 +123,7 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n mi_bitmap_index_t bitmap_index; if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) { // claimed it! set the dirty bits (todo: no need for an atomic op here?) - *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index); + *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); *memid = mi_memid_create(arena_index, bitmap_index); *commit = true; // TODO: support commit on demand? *large = arena->is_large; @@ -181,7 +181,10 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, // finally, fall back to the OS *is_zero = true; - *memid = MI_MEMID_OS; + *memid = MI_MEMID_OS; + if (*large) { + *large = mi_option_is_enabled(mi_option_large_os_pages); // try large OS pages only if enabled and allowed + } return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); } @@ -288,7 +291,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec if (post > 0) { // don't use leftover bits at the end mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); - mi_bitmap_claim(arena->blocks_map, fields, post, postidx); + mi_bitmap_claim(arena->blocks_map, fields, post, postidx, NULL); } mi_arena_add(arena); diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 19e6bbb8..3847e712 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -61,6 +61,7 @@ static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { // The bit mask for a given number of blocks at a specified bit index. static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); + if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; return ((((uintptr_t)1 << count) - 1) << bitidx); } @@ -183,14 +184,25 @@ static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, s // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously -static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { +static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); // mi_assert_internal((bitmap[idx] & mask) == 0); uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); + if (any_zero != NULL) *any_zero = ((prev & mask) != mask); return ((prev & mask) == 0); } +// Returns `true` if all `count` bits were 1 +static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + // mi_assert_internal((bitmap[idx] & mask) == 0); + return ((mi_atomic_read(&bitmap[idx]) & mask) == mask); +} + #endif \ No newline at end of file diff --git a/src/memory.c b/src/memory.c index f8798d99..a1f94e18 100644 --- a/src/memory.c +++ b/src/memory.c @@ -65,10 +65,11 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE +#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS #define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) -#define MI_REGION_MAX_ALLOC_SIZE (MI_REGION_SIZE/4) // 64MiB #define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) - +#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB +#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) // Region info is a pointer to the memory region and two bits for // its flags: is_large, and is_committed. @@ -88,20 +89,16 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) - volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) - size_t arena_memid; // if allocated from a (huge page) arena + volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) + volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) + mi_bitmap_field_t in_use; + mi_bitmap_field_t dirty; + size_t arena_memid; // if allocated from a (huge page) arena } mem_region_t; // The region map static mem_region_t regions[MI_REGION_MAX]; -// A bit mask per region for its claimed MI_SEGMENT_SIZE blocks. -static mi_bitmap_field_t regions_map[MI_REGION_MAX]; - -// A bit mask per region to track which blocks are dirty (= potentially written to) -static mi_bitmap_field_t regions_dirty[MI_REGION_MAX]; - // Allocated regions static volatile _Atomic(uintptr_t) regions_count; // = 0; @@ -112,8 +109,7 @@ Utility functions // Blocks (of 4MiB) needed for the given size. static size_t mi_region_block_count(size_t size) { - mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); - return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE; + return _mi_divide_up(size, MI_SEGMENT_SIZE); } // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. @@ -134,8 +130,11 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { } -static size_t mi_memid_create(mi_bitmap_index_t bitmap_idx) { - return bitmap_idx<<1; +static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { + mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); + size_t idx = region - regions; + mi_assert_internal(®ions[idx] == region); + return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; } static size_t mi_memid_create_from_arena(size_t arena_memid) { @@ -146,177 +145,149 @@ static bool mi_memid_is_arena(size_t id) { return ((id&1)==1); } -static bool mi_memid_indices(size_t id, mi_bitmap_index_t* bitmap_idx, size_t* arena_memid) { +static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { if (mi_memid_is_arena(id)) { *arena_memid = (id>>1); return true; } else { - *bitmap_idx = (mi_bitmap_index_t)(id>>1); + size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; + *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; + *region = ®ions[idx]; return false; } } /* ---------------------------------------------------------------------------- - Ensure a region is allocated from the OS (or an arena) + Allocate a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ -static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_info_t* pinfo, mi_os_tld_t* tld) +static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { - // ensure the region is reserved - mi_region_info_t info = mi_atomic_read(®ions[idx].info); - if (mi_unlikely(info == 0)) - { - bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); - bool region_large = allow_large; - bool is_zero = false; - size_t arena_memid = 0; - void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); - mi_assert_internal(!(region_large && !allow_large)); + // not out of regions yet? + if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; - if (start == NULL) { - // failure to allocate from the OS! fail - *pinfo = 0; - return false; - } - - // set the newly allocated region - // try to initialize any region up to 4 beyond the current one in - // care multiple threads are doing this concurrently (common at startup) - info = mi_region_info_create(start, region_large, region_commit); - bool claimed = false; - for (size_t i = 0; i <= 4 && idx + i < MI_REGION_MAX && !claimed; i++) { - if (!is_zero) { - // set dirty bits before CAS; this might race with a zero block but that is ok. - // (but writing before cas prevents a concurrent allocation to assume it is not dirty) - mi_atomic_write(®ions_dirty[idx+i], MI_BITMAP_FIELD_FULL); - } - if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { - // claimed! - regions[idx+i].arena_memid = arena_memid; - mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1); - mi_atomic_increment(®ions_count); - claimed = true; - } - } - if (!claimed) { - // free our OS allocation if we didn't succeed to store it in some region - _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); - } - // continue with the actual info at our index in case another thread was quicker with the allocation - info = mi_atomic_read(®ions[idx].info); - mi_assert_internal(info != 0); + // try to allocate a fresh region from the OS + bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); + bool region_large = (commit && allow_large); + bool is_zero = false; + size_t arena_memid = 0; + void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); + if (start == NULL) return false; + mi_assert_internal(!(region_large && !allow_large)); + + // claim a fresh slot + const uintptr_t idx = mi_atomic_increment(®ions_count); + if (idx >= MI_REGION_MAX) { + mi_atomic_decrement(®ions_count); + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); + return false; } - mi_assert_internal(info == mi_atomic_read(®ions[idx].info)); - mi_assert_internal(info != 0); - *pinfo = info; + + // allocated, initialize and claim the initial blocks + mem_region_t* r = ®ions[idx]; + r->numa_node = _mi_os_numa_node(tld) + 1; + r->arena_memid = arena_memid; + *bit_idx = 0; + mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); + mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others + *region = r; + return true; +} + +/* ---------------------------------------------------------------------------- + Try to claim blocks in suitable regions +-----------------------------------------------------------------------------*/ + +static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) { + // initialized at all? + mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); + if (info==0) return false; + + // numa correct + if (numa_node >= 0) { // use negative numa node to always succeed + int rnode = ((int)mi_atomic_read_relaxed(®ion->numa_node)) - 1; + if (rnode >= 0 && rnode != numa_node) return false; + } + + // note: we also skip if commit is false and the region is committed, + // that is a bit strong but prevents allocation of eager-delayed segments in an eagerly committed region + bool is_large; + bool is_committed; + mi_region_info_read(info, &is_large, &is_committed); + + if (!commit && is_committed) return false; + if (!allow_large && is_large) return false; return true; } -/* ---------------------------------------------------------------------------- - Commit blocks ------------------------------------------------------------------------------*/ - -static void* mi_region_commit_blocks(mi_bitmap_index_t bitmap_idx, mi_region_info_t info, size_t blocks, size_t size, bool* commit, bool* is_large, bool* is_zero, mi_os_tld_t* tld) +static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { - // set dirty bits - *is_zero = mi_bitmap_claim(regions_dirty, MI_REGION_MAX, blocks, bitmap_idx); + // try all regions for a free slot + const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); + const size_t count = mi_atomic_read(®ions_count); + size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? + for (size_t visited = 0; visited < count; visited++, idx++) { + if (idx >= count) idx = 0; // wrap around + mem_region_t* r = ®ions[idx]; + if (mi_region_is_suitable(r, numa_node, commit, allow_large)) { + if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) { + tld->region_idx = idx; // remember the last found position + *region = r; + return true; + } + } + } + return false; +} - // Commit the blocks to memory + +static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); + mem_region_t* region; + mi_bitmap_index_t bit_idx; + // first try to claim in existing regions + if (!mi_region_try_claim(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { + // otherwise try to allocate a fresh region + if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { + // out of regions or memory + return NULL; + } + } + + // found a region and claimed `blocks` at `bit_idx` + mi_assert_internal(region != NULL); + mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); + + mi_region_info_t info = mi_atomic_read(®ion->info); bool region_is_committed = false; bool region_is_large = false; void* start = mi_region_info_read(info, ®ion_is_large, ®ion_is_committed); mi_assert_internal(!(region_is_large && !*is_large)); - mi_assert_internal(start!=NULL); + mi_assert_internal(start != NULL); - void* blocks_start = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bitmap_idx) * MI_SEGMENT_SIZE); - if (*commit && !region_is_committed) { - // ensure commit - bool commit_zero = false; - _mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats); // only commit needed size (unless using large OS pages) - if (commit_zero) *is_zero = true; - } - else if (!*commit && region_is_committed) { - // but even when no commit is requested, we might have committed anyway (in a huge OS page for example) - *commit = true; - } - - // and return the allocation - mi_assert_internal(blocks_start != NULL); + bool any_zero = false; + *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, &any_zero); + if (!mi_option_is_enabled(mi_option_eager_commit)) any_zero = true; // if no eager commit, even dirty segments may be partially committed *is_large = region_is_large; - return blocks_start; + *memid = mi_memid_create(region, bit_idx); + void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); + if (*commit && !region_is_committed && any_zero) { // want to commit, but not yet fully committed? + // ensure commit + _mi_os_commit(p, blocks * MI_SEGMENT_SIZE, is_zero, tld->stats); + } + else { + *commit = region_is_committed || !any_zero; + } + + + // and return the allocation + mi_assert_internal(p != NULL); + return p; } -/* ---------------------------------------------------------------------------- - Claim and allocate blocks in a region ------------------------------------------------------------------------------*/ - -static bool mi_region_alloc_blocks( - size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, - void** p, size_t* id, mi_os_tld_t* tld) -{ - mi_bitmap_index_t bitmap_idx; - if (!mi_bitmap_try_claim_field(regions_map, idx, blocks, &bitmap_idx)) { - return true; // no error, but also no success - } - mi_region_info_t info; - if (!mi_region_ensure_allocated(idx,*allow_large,&info,tld)) { - // failed to allocate region memory, unclaim the bits and fail - mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx); - return false; - } - *p = mi_region_commit_blocks(bitmap_idx,info,blocks,size,commit,allow_large,is_zero,tld); - *id = mi_memid_create(bitmap_idx); - return true; -} - - -/* ---------------------------------------------------------------------------- - Try to allocate blocks in suitable regions ------------------------------------------------------------------------------*/ - -static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool allow_large ) { - uintptr_t m = mi_atomic_read_relaxed(®ions_map[idx]); - if (m == MI_BITMAP_FIELD_FULL) return false; - if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = ((int)mi_atomic_read_relaxed(®ions[idx].numa_node)) - 1; - if (rnode >= 0 && rnode != numa_node) return false; - } - if (commit && allow_large) return true; // always ok - - // otherwise skip incompatible regions if possible. - // this is not guaranteed due to multiple threads allocating at the same time but - // that's ok. In secure mode, large is never allowed for any thread, so that works out; - // otherwise we might just not be able to reset/decommit individual pages sometimes. - mi_region_info_t info = mi_atomic_read_relaxed(®ions[idx].info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info, &is_large, &is_committed); - // note: we also skip if commit is false and the region is committed, - // that is a bit strong but prevents allocation of eager delayed segments in - // committed memory - bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation? - return ok; -} - -// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_try_alloc_blocks( - int numa_node, size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, - void** p, size_t* id, mi_os_tld_t* tld) -{ - // check if there are available blocks in the region.. - mi_assert_internal(idx < MI_REGION_MAX); - if (mi_region_is_suitable(numa_node, idx, *commit, *allow_large)) { - return mi_region_alloc_blocks(idx, blocks, size, commit, allow_large, is_zero, p, id, tld); - } - return true; // no error, but no success either -} /* ---------------------------------------------------------------------------- Allocation @@ -324,63 +295,35 @@ static bool mi_region_try_alloc_blocks( // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, - size_t* id, mi_os_tld_t* tld) +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - mi_assert_internal(id != NULL && tld != NULL); + mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); - *id = 0; + *memid = 0; *is_zero = false; bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` - - // use direct OS allocation for huge blocks or alignment - if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { - size_t arena_memid = 0; - void* p = _mi_arena_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, is_zero, &arena_memid, tld); // round up size - *id = mi_memid_create_from_arena(arena_memid); - return p; - } - - // always round size to OS page size multiple (so commit/decommit go over the entire range) - // TODO: use large OS page size here? + if (size == 0) return NULL; size = _mi_align_up(size, _mi_os_page_size()); - // calculate the number of needed blocks + // allocate from regions if possible + size_t arena_memid; const size_t blocks = mi_region_block_count(size); - mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); - - // find a range of free blocks - const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); - void* p = NULL; - const size_t count = mi_atomic_read(®ions_count); - size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? - for (size_t visited = 0; visited < count; visited++, idx++) { - if (idx >= count) idx = 0; // wrap around - if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error - if (p != NULL) break; - } - - if (p == NULL) { - // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions - for (idx = count; idx < mi_atomic_read_relaxed(®ions_count) + 8 && idx < MI_REGION_MAX; idx++) { - if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error - if (p != NULL) break; + if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { + void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); + mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0); + if (p != NULL) { + if (*commit) { ((uint8_t*)p)[0] = 0; } + return p; } + _mi_warning_message("unable to allocate from region: size %zu\n", size); } - if (p == NULL) { - // we could not find a place to allocate, fall back to the os directly - _mi_warning_message("unable to allocate from region: size %zu\n", size); - size_t arena_memid = 0; - p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); - *id = mi_memid_create_from_arena(arena_memid); - } - else { - tld->region_idx = idx; // next start of search - } - + // and otherwise fall back to the OS + void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); + *memid = mi_memid_create_from_arena(arena_memid); mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); + if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; } return p; } @@ -396,31 +339,28 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { if (p==NULL) return; if (size==0) return; size_t arena_memid = 0; - mi_bitmap_index_t bitmap_idx; - if (mi_memid_indices(id,&bitmap_idx,&arena_memid)) { + mi_bitmap_index_t bit_idx; + mem_region_t* region; + if (mi_memid_indices(id,®ion,&bit_idx,&arena_memid)) { // was a direct arena allocation, pass through _mi_arena_free(p, size, arena_memid, stats); } else { // allocated in a region - mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; + mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; // we can align the size up to page size (as we allocate that way too) // this ensures we fully commit/decommit/reset size = _mi_align_up(size, _mi_os_page_size()); - const size_t blocks = mi_region_block_count(size); - const size_t idx = mi_bitmap_index_field(bitmap_idx); - const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? - mem_region_t* region = ®ions[idx]; + const size_t blocks = mi_region_block_count(size); mi_region_info_t info = mi_atomic_read(®ion->info); bool is_large; bool is_eager_committed; void* start = mi_region_info_read(info,&is_large,&is_eager_committed); mi_assert_internal(start != NULL); - void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); + void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE); mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bitidx + blocks <= MI_BITMAP_FIELD_BITS); - if (blocks_start != p || bitidx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? + mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); + if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? // decommit (or reset) the blocks to reduce the working set. // TODO: implement delayed decommit/reset as these calls are too expensive @@ -446,7 +386,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // this frees up virtual address space which might be useful on 32-bit systems? // and unclaim - mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx); + mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); } } @@ -456,13 +396,15 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { -----------------------------------------------------------------------------*/ void _mi_mem_collect(mi_stats_t* stats) { // free every region that has no segments in use. - for (size_t i = 0; i < regions_count; i++) { - if (mi_atomic_read_relaxed(®ions_map[i]) == 0) { + uintptr_t rcount = mi_atomic_read_relaxed(®ions_count); + for (size_t i = 0; i < rcount; i++) { + mem_region_t* region = ®ions[i]; + if (mi_atomic_read_relaxed(®ion->info) != 0) { // if no segments used, try to claim the whole region uintptr_t m; do { - m = mi_atomic_read_relaxed(®ions_map[i]); - } while(m == 0 && !mi_atomic_cas_weak(®ions_map[i], MI_BITMAP_FIELD_FULL, 0 )); + m = mi_atomic_read_relaxed(®ion->in_use); + } while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 )); if (m == 0) { // on success, free the whole region bool is_eager_committed; @@ -471,9 +413,7 @@ void _mi_mem_collect(mi_stats_t* stats) { _mi_arena_free(start, MI_REGION_SIZE, regions[i].arena_memid, stats); } // and release - mi_atomic_write(®ions[i].info,0); - mi_atomic_write(®ions_dirty[i],0); - mi_atomic_write(®ions_map[i],0); + mi_atomic_write(®ion->info,0); } } } diff --git a/src/segment.c b/src/segment.c index 178e0eda..b2b37fac 100644 --- a/src/segment.c +++ b/src/segment.c @@ -370,7 +370,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } segment->memid = memid; segment->mem_is_fixed = mem_large; - segment->mem_is_committed = commit; + segment->mem_is_committed = commit; mi_segments_track_size((long)segment_size, tld); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); From d2279b2a3faf7c2e084644449326306ef8d4f619 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 10 Nov 2019 08:13:40 -0800 Subject: [PATCH 071/293] update test-stress with better object distribution --- test/test-stress.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index be2a9c67..37572d42 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -17,8 +17,8 @@ terms of the MIT license. #include // argument defaults -static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 40; // scaling factor +static int THREADS = 8; // more repeatable if THREADS <= #processors +static int N = 200; // scaling factor // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor @@ -63,7 +63,11 @@ static bool chance(size_t perc, random_t r) { } static void* alloc_items(size_t items, random_t r) { - if (chance(1, r)) items *= 100; // 1% huge objects; + if (chance(1, r)) { + if (chance(1, r)) items *= 1000; // 0.01% giant + else if (chance(10, r)) items *= 100; // 0.1% huge + else items *= 10; // 1% large objects; + } if (items==40) items++; // pthreads uses that size for stack increases uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); if (p != NULL) { From 21bbb1be870c8b9bd6ca057257a4cbb0ec57e6e5 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 10 Nov 2019 12:36:55 -0800 Subject: [PATCH 072/293] fix warnings --- src/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/init.c b/src/init.c index 7e704e7a..d5ec03c2 100644 --- a/src/init.c +++ b/src/init.c @@ -100,8 +100,8 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { 0, tld_main_stats, {{0,NULL,0}} }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { From 83a066fd2d0d7484abf6372e41ac777c721c761a Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 09:46:02 -0800 Subject: [PATCH 073/293] remove reset_decommits option --- include/mimalloc.h | 3 +-- src/memory.c | 28 ++++++++++++---------------- src/options.c | 7 +++---- src/os.c | 20 +++----------------- 4 files changed, 19 insertions(+), 39 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 70b6e412..4c542ee0 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -272,9 +272,8 @@ typedef enum mi_option_e { mi_option_segment_cache, mi_option_page_reset, mi_option_cache_reset, - mi_option_reset_decommits, - mi_option_eager_commit_delay, mi_option_segment_reset, + mi_option_eager_commit_delay, mi_option_os_tag, mi_option_max_numa_node, mi_option_max_errors, diff --git a/src/memory.c b/src/memory.c index a1f94e18..ceb9a702 100644 --- a/src/memory.c +++ b/src/memory.c @@ -350,12 +350,12 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; // we can align the size up to page size (as we allocate that way too) // this ensures we fully commit/decommit/reset - size = _mi_align_up(size, _mi_os_page_size()); - const size_t blocks = mi_region_block_count(size); + size = _mi_align_up(size, _mi_os_page_size()); + const size_t blocks = mi_region_block_count(size); mi_region_info_t info = mi_atomic_read(®ion->info); bool is_large; - bool is_eager_committed; - void* start = mi_region_info_read(info,&is_large,&is_eager_committed); + bool is_committed; + void* start = mi_region_info_read(info, &is_large, &is_committed); mi_assert_internal(start != NULL); void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE); mi_assert_internal(blocks_start == p); // not a pointer in our area? @@ -366,18 +366,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // TODO: implement delayed decommit/reset as these calls are too expensive // if the memory is reused soon. // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large - if (!is_large) { - if (mi_option_is_enabled(mi_option_segment_reset)) { - if (!is_eager_committed && // cannot reset large pages - (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead - mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments - { - _mi_os_reset(p, size, stats); - //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? - } - } - } - if (!is_eager_committed) { + if (!is_large && + mi_option_is_enabled(mi_option_segment_reset) && + mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use `option_page_reset` instead + { + _mi_os_reset(p, size, stats); + //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? + } + if (!is_committed) { // adjust commit statistics as we commit again when re-using the same slot _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); } diff --git a/src/options.c b/src/options.c index 63b1612a..75a2736a 100644 --- a/src/options.c +++ b/src/options.c @@ -65,11 +65,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, + { 1, UNINIT, MI_OPTION(page_reset) }, { 0, UNINIT, MI_OPTION(cache_reset) }, - { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output @@ -88,7 +87,7 @@ void _mi_options_init(void) { mi_option_desc_t* desc = &options[option]; _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } - } + } mi_max_error_count = mi_option_get(mi_option_max_errors); } diff --git a/src/os.c b/src/os.c index 027df6ab..5229381b 100644 --- a/src/os.c +++ b/src/os.c @@ -646,10 +646,6 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats); } -bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, true /* conservative? */, is_zero, stats); -} - // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory @@ -708,22 +704,12 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_decommit(addr,size,stats); - } - else { - return mi_os_resetx(addr, size, true, stats); - } + return mi_os_resetx(addr, size, true, stats); } bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) - } - else { - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); - } + *is_zero = false; + return mi_os_resetx(addr, size, false, stats); } From 93a646338343984b86b00b1c7852322eafa7190e Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 14:16:45 -0800 Subject: [PATCH 074/293] only allow commit delay for small and medium objects --- src/options.c | 2 +- src/segment.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/options.c b/src/options.c index 75a2736a..dbb7df79 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, + { 0, UNINIT, MI_OPTION(page_reset) }, { 0, UNINIT, MI_OPTION(cache_reset) }, { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed diff --git a/src/segment.c b/src/segment.c index b2b37fac..d089078c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -328,9 +328,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Try to get it from our thread local cache first - bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); - bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); - bool commit = eager || (page_kind > MI_PAGE_MEDIUM); + bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); + bool commit = eager || (page_kind >= MI_PAGE_LARGE); bool protection_still_good = false; bool is_zero = false; mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); @@ -359,7 +359,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, else { // Allocate the segment from the OS size_t memid; - bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy + bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { From 534e1e39ef29946e502fd0f668d2dc80ffd141da Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 14:42:29 -0800 Subject: [PATCH 075/293] allow allocation in committed regions even if not requested --- src/memory.c | 6 ++---- src/options.c | 4 ++-- src/segment.c | 4 +++- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/memory.c b/src/memory.c index ceb9a702..24239e05 100644 --- a/src/memory.c +++ b/src/memory.c @@ -210,14 +210,12 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo if (rnode >= 0 && rnode != numa_node) return false; } - // note: we also skip if commit is false and the region is committed, - // that is a bit strong but prevents allocation of eager-delayed segments in an eagerly committed region + // check allow-large bool is_large; bool is_committed; mi_region_info_read(info, &is_large, &is_committed); - - if (!commit && is_committed) return false; if (!allow_large && is_large) return false; + return true; } diff --git a/src/options.c b/src/options.c index dbb7df79..694b916b 100644 --- a/src/options.c +++ b/src/options.c @@ -65,8 +65,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, - { 0, UNINIT, MI_OPTION(cache_reset) }, + { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 0, UNINIT, MI_OPTION(cache_reset) }, // reset segment cache on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment.c b/src/segment.c index d089078c..eb5a0390 100644 --- a/src/segment.c +++ b/src/segment.c @@ -327,12 +327,14 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, mi_assert_internal(segment_size >= required); size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); - // Try to get it from our thread local cache first + // Initialize parameters bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (page_kind >= MI_PAGE_LARGE); bool protection_still_good = false; bool is_zero = false; + + // Try to get it from our thread local cache first mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { if (MI_SECURE!=0) { From 2bb058bd25258c2e7a9fb2c1a64400ec780c2912 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 14:44:32 -0800 Subject: [PATCH 076/293] remove cache_reset parameter --- include/mimalloc.h | 1 - src/options.c | 1 - src/segment.c | 6 +----- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 4c542ee0..6df889a4 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -271,7 +271,6 @@ typedef enum mi_option_e { mi_option_reserve_huge_os_pages, mi_option_segment_cache, mi_option_page_reset, - mi_option_cache_reset, mi_option_segment_reset, mi_option_eager_commit_delay, mi_option_os_tag, diff --git a/src/options.c b/src/options.c index 694b916b..1231e1c9 100644 --- a/src/options.c +++ b/src/options.c @@ -66,7 +66,6 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free - { 0, UNINIT, MI_OPTION(cache_reset) }, // reset segment cache on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment.c b/src/segment.c index eb5a0390..ef24c660 100644 --- a/src/segment.c +++ b/src/segment.c @@ -280,9 +280,6 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) return false; } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); - if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) { - _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); - } segment->next = tld->cache; tld->cache = segment; tld->cache_count++; @@ -351,8 +348,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats); segment->mem_is_committed = true; } - if (!segment->mem_is_fixed && - (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) { + if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_page_reset)) { bool reset_zero = false; _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats); if (reset_zero) is_zero = true; From db3f1c4bfadcb7007357fd61d7dc24369ae8fe31 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 15:26:11 -0800 Subject: [PATCH 077/293] add commit info to arenas --- src/arena.c | 66 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/src/arena.c b/src/arena.c index 1b6cf4a4..02890bd6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -33,6 +33,7 @@ of 256MiB in practice. #include "bitmap.inc.c" // atomic bitmap + // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); void _mi_os_free(void* p, size_t size, mi_stats_t* stats); @@ -40,6 +41,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- @@ -56,13 +58,15 @@ int _mi_os_numa_node_count(void); typedef struct mi_arena_s { uint8_t* start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) - size_t field_count; // number of bitmap fields + size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? + bool is_committed; // is the memory committed bool is_large; // large OS page allocated volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? - mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks + mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed? + mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; @@ -104,7 +108,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* size_t idx = mi_atomic_read(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around - if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) { + if (mi_bitmap_try_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { mi_atomic_write(&arena->search_idx, idx); // start search from here next time return true; } @@ -118,31 +122,46 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* ----------------------------------------------------------- */ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_zero, size_t* memid) + bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_bitmap_index_t bitmap_index; - if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) { - // claimed it! set the dirty bits (todo: no need for an atomic op here?) - *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); - *memid = mi_memid_create(arena_index, bitmap_index); - *commit = true; // TODO: support commit on demand? - *large = arena->is_large; - return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE)); + if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; + + // claimed it! set the dirty bits (todo: no need for an atomic op here?) + void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); + *memid = mi_memid_create(arena_index, bitmap_index); + *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + *large = arena->is_large; + if (arena->is_committed) { + // always committed + *commit = true; } - return NULL; + else if (commit) { + // ensure commit now + bool any_zero; + mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_zero); + if (any_zero) { + bool commit_zero; + _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); + if (commit_zero) *is_zero = true; + } + } + else { + // no need to commit, but check if already fully committed + *commit = mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + } + return p; } void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; *is_zero = false; - bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` - + // try to allocate in an arena if the alignment is small enough // and the object is not too large or too small. if (alignment <= MI_SEGMENT_ALIGN && @@ -160,7 +179,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -172,7 +191,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -182,9 +201,6 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; - if (*large) { - *large = mi_option_is_enabled(mi_option_large_os_pages); // try large OS pages only if enabled and allowed - } return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); } @@ -223,7 +239,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { return; } const size_t blocks = mi_block_count_of_size(size); - bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx); + bool ones = mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!ones) { _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); return; @@ -283,15 +299,17 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = true; arena->is_zero_init = true; + arena->is_committed = true; arena->search_idx = 0; - arena->blocks_dirty = &arena->blocks_map[bcount]; + arena->blocks_dirty = &arena->blocks_inuse[bcount]; + arena->blocks_committed = NULL; // the bitmaps are already zero initialized due to os_alloc // just claim leftover blocks if needed size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; if (post > 0) { // don't use leftover bits at the end mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); - mi_bitmap_claim(arena->blocks_map, fields, post, postidx, NULL); + mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } mi_arena_add(arena); From 5e6754f3f7905485ca74546ab082f4c3bc5404fd Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 15:45:31 -0800 Subject: [PATCH 078/293] track commit status per block in a region --- src/memory.c | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/src/memory.c b/src/memory.c index 208b9b7e..8299bbc2 100644 --- a/src/memory.c +++ b/src/memory.c @@ -59,7 +59,7 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_ // Constants #if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 40KiB for the region map +#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map #elif (MI_INTPTR_SIZE==4) #define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map #else @@ -94,8 +94,9 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b typedef struct mem_region_s { volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) - mi_bitmap_field_t in_use; - mi_bitmap_field_t dirty; + mi_bitmap_field_t in_use; // bit per in-use block + mi_bitmap_field_t dirty; // track if non-zero per block + mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed)) size_t arena_memid; // if allocated from a (huge page) arena } mem_region_t; @@ -165,20 +166,20 @@ static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t Allocate a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ -static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // not out of regions yet? if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; // try to allocate a fresh region from the OS bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); - bool region_large = (commit && allow_large); - bool is_zero = false; + bool region_large = (commit && allow_large); + bool is_zero = false; size_t arena_memid = 0; void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); if (start == NULL) return false; mi_assert_internal(!(region_large && !allow_large)); - + // claim a fresh slot const uintptr_t idx = mi_atomic_increment(®ions_count); if (idx >= MI_REGION_MAX) { @@ -191,8 +192,13 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t* r = ®ions[idx]; r->numa_node = _mi_os_numa_node(tld) + 1; r->arena_memid = arena_memid; + mi_atomic_write(&r->in_use, 0); + mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL)); + mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0)); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); + + // and share it mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others *region = r; return true; @@ -269,20 +275,28 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_assert_internal(!(region_is_large && !*is_large)); mi_assert_internal(start != NULL); - bool any_zero = false; - *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, &any_zero); - if (!mi_option_is_enabled(mi_option_eager_commit)) any_zero = true; // if no eager commit, even dirty segments may be partially committed + *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); *is_large = region_is_large; *memid = mi_memid_create(region, bit_idx); void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); - if (*commit && !region_is_committed && any_zero) { // want to commit, but not yet fully committed? - // ensure commit - _mi_os_commit(p, blocks * MI_SEGMENT_SIZE, is_zero, tld->stats); + if (region_is_committed) { + // always committed + *commit = true; + } + else if (*commit) { + // ensure commit + bool any_zero; + mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_zero); + if (any_zero) { + bool commit_zero; + _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); + if (commit_zero) *is_zero = true; + } } else { - *commit = region_is_committed || !any_zero; - } - + // no need to commit, but check if already fully committed + *commit = mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); + } // and return the allocation mi_assert_internal(p != NULL); @@ -374,7 +388,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { mi_option_is_enabled(mi_option_segment_reset) && mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use `option_page_reset` instead { - _mi_os_reset(p, size, tld->stats); + // note: don't use `_mi_mem_reset` as it is shared with other threads! + _mi_os_reset(p, size, tld->stats); // TODO: maintain reset bits to unreset } if (!is_committed) { // adjust commit statistics as we commit again when re-using the same slot From a0958b2da696a308f8c200f45f08bf1ab3e5f14b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 17:06:16 -0800 Subject: [PATCH 079/293] enable more reset delay slots --- include/mimalloc-types.h | 13 ++++++-- src/init.c | 9 ++++-- src/memory.c | 70 ++++++++++++++++++++++++++-------------- src/options.c | 2 +- src/segment.c | 4 ++- 5 files changed, 66 insertions(+), 32 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 2651fc85..0ce91339 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -390,13 +390,20 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); // ------------------------------------------------------ typedef int64_t mi_msecs_t; +#define MI_RESET_DELAY_SLOTS (256) + typedef struct mi_delay_slot_s { mi_msecs_t expire; uint8_t* addr; size_t size; } mi_delay_slot_t; -#define MI_RESET_DELAY_SLOTS (128) +typedef struct mi_delay_slots_s { + size_t capacity; // always `MI_RESET_DELAY_SLOTS` + size_t count; // current slots used (`<= capacity`) + mi_delay_slot_t slots[MI_RESET_DELAY_SLOTS]; +} mi_delay_slots_t; + // ------------------------------------------------------ // Thread Local data @@ -411,8 +418,8 @@ typedef struct mi_segment_queue_s { // OS thread local data typedef struct mi_os_tld_s { size_t region_idx; // start point for next allocation - mi_stats_t* stats; // points to tld stats - mi_delay_slot_t reset_delay[MI_RESET_DELAY_SLOTS]; + mi_delay_slots_t* reset_delay; // delay slots for OS reset operations + mi_stats_t* stats; // points to tld stats } mi_os_tld_t; // Segments thread local data diff --git a/src/init.c b/src/init.c index d5ec03c2..c9700cd5 100644 --- a/src/init.c +++ b/src/init.c @@ -100,8 +100,8 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, tld_main_stats, {{0,NULL,0}} }, // os - { MI_STATS_NULL } // stats + { 0, NULL, tld_main_stats }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { @@ -192,6 +192,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; + mi_delay_slots_t reset_delay; } mi_thread_data_t; // Initialize the thread local default heap, called from `mi_thread_init` @@ -211,6 +212,7 @@ static bool _mi_heap_init(void) { } mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; + mi_delay_slots_t* reset_delay = &td->reset_delay; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); heap->random = _mi_random_init(heap->thread_id); @@ -221,6 +223,9 @@ static bool _mi_heap_init(void) { tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; tld->os.stats = &tld->stats; + tld->os.reset_delay = reset_delay; + memset(reset_delay, 0, sizeof(*reset_delay)); + reset_delay->capacity = MI_RESET_DELAY_SLOTS; _mi_heap_default = heap; } return false; diff --git a/src/memory.c b/src/memory.c index 8299bbc2..f3052d6b 100644 --- a/src/memory.c +++ b/src/memory.c @@ -54,7 +54,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, s void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); // local -static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size); +static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size); // Constants @@ -208,7 +208,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, Try to claim blocks in suitable regions -----------------------------------------------------------------------------*/ -static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) { +static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); if (info==0) return false; @@ -229,7 +229,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo } -static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +static bool mi_region_try_claim(size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // try all regions for a free slot const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); @@ -238,7 +238,7 @@ static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, me for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around mem_region_t* r = ®ions[idx]; - if (mi_region_is_suitable(r, numa_node, commit, allow_large)) { + if (mi_region_is_suitable(r, numa_node, allow_large)) { if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; @@ -256,7 +256,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mem_region_t* region; mi_bitmap_index_t bit_idx; // first try to claim in existing regions - if (!mi_region_try_claim(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { + if (!mi_region_try_claim(blocks, *is_large, ®ion, &bit_idx, tld)) { // otherwise try to allocate a fresh region if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { // out of regions or memory @@ -354,7 +354,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { if (p==NULL) return; if (size==0) return; - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + mi_delay_remove(tld->reset_delay, p, size); size_t arena_memid = 0; mi_bitmap_index_t bit_idx; @@ -424,7 +424,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE); + mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE); _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); } // and release @@ -440,21 +440,22 @@ void _mi_mem_collect(mi_os_tld_t* tld) { typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); -static void mi_delay_insert(mi_delay_slot_t* slots, size_t count, +static void mi_delay_insert(mi_delay_slots_t* ds, mi_msecs_t delay, uint8_t* addr, size_t size, mi_delay_resolve_fun* resolve, void* arg) { - if (delay==0) { + if (ds == NULL || delay==0 || addr==NULL || size==0) { resolve(addr, size, arg); return; } mi_msecs_t now = _mi_clock_now(); - mi_delay_slot_t* oldest = slots; + mi_delay_slot_t* oldest = &ds->slots[0]; // walk through all slots, resolving expired ones. // remember the oldest slot to insert the new entry in. - for (size_t i = 0; i < count; i++) { - mi_delay_slot_t* slot = &slots[i]; + size_t newcount = 0; + for (size_t i = 0; i < ds->count; i++) { + mi_delay_slot_t* slot = &ds->slots[i]; if (slot->expire == 0) { // empty slot @@ -480,26 +481,40 @@ static void mi_delay_insert(mi_delay_slot_t* slots, size_t count, } else if (oldest->expire > slot->expire) { oldest = slot; + newcount = i+1; + } + else { + newcount = i+1; } } + ds->count = newcount; if (delay>0) { - // not yet registered, use the oldest slot - if (oldest->expire > 0) { + // not yet registered, use the oldest slot (or a new one if there is space) + if (ds->count < ds->capacity) { + oldest = &ds->slots[ds->count]; + ds->count++; + } + else if (oldest->expire > 0) { resolve(oldest->addr, oldest->size, arg); // evict if not empty } + mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count); oldest->expire = now + delay; oldest->addr = addr; oldest->size = size; } } -static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size) +static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size) { + if (ds == NULL || p==NULL || size==0) return false; + uint8_t* addr = (uint8_t*)p; bool done = false; - // walk through all slots - for (size_t i = 0; i < count; i++) { - mi_delay_slot_t* slot = &slots[i]; + size_t newcount = 0; + + // walk through all valid slots + for (size_t i = 0; i < ds->count; i++) { + mi_delay_slot_t* slot = &ds->slots[i]; if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { // earlier slot encompasses the area; remove it slot->expire = 0; @@ -510,12 +525,17 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_ slot->expire = 0; } else if ((addr <= slot->addr && addr + size > slot->addr) || - (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { - // partial overlap, remove slot - mi_assert_internal(false); + (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { + // partial overlap + // can happen with a large object spanning onto some partial end block + // mi_assert_internal(false); slot->expire = 0; } + else { + newcount = i + 1; + } } + ds->count = newcount; return done; } @@ -525,13 +545,13 @@ static void mi_resolve_reset(void* p, size_t size, void* vtld) { } bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay), + mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay), (uint8_t*)p, size, &mi_resolve_reset, tld); return true; } bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) { + if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) { return _mi_os_unreset(p, size, is_zero, tld->stats); } return true; @@ -544,12 +564,12 @@ bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { -----------------------------------------------------------------------------*/ bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + mi_delay_remove(tld->reset_delay,p, size); return _mi_os_commit(p, size, is_zero, tld->stats); } bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + mi_delay_remove(tld->reset_delay, p, size); return _mi_os_decommit(p, size, tld->stats); } diff --git a/src/options.c b/src/options.c index 81ffe88b..ff96c95b 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds diff --git a/src/segment.c b/src/segment.c index 49dab6ba..549dd339 100644 --- a/src/segment.c +++ b/src/segment.c @@ -504,7 +504,9 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? - if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { + if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) + // && segment->page_kind <= MI_PAGE_MEDIUM) // to prevent partial overlapping resets + { size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; From 165ee4584597aebdb1a45fcd4e8b3904b6f7d396 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 17:31:48 -0800 Subject: [PATCH 080/293] initialize delay slots for the main thread --- src/init.c | 4 +++- src/options.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/init.c b/src/init.c index c9700cd5..5967b4b9 100644 --- a/src/init.c +++ b/src/init.c @@ -96,11 +96,13 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) #define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) +static mi_delay_slots_t tld_reset_delay_main = { MI_RESET_DELAY_SLOTS, 0, { {0,NULL,0} } }; + static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, NULL, tld_main_stats }, // os + { 0, &tld_reset_delay_main, tld_main_stats }, // os { MI_STATS_NULL } // stats }; diff --git a/src/options.c b/src/options.c index ff96c95b..81ffe88b 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds From ef179a63770d8e17f105303a08ddfdd57085b936 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:16:59 -0800 Subject: [PATCH 081/293] avoid allocation at numa node detection on linux --- include/mimalloc-internal.h | 37 +++++++++++++++------ src/os.c | 65 +++++++++++++++++-------------------- 2 files changed, 56 insertions(+), 46 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 6bfabe27..668a7bd3 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -17,18 +17,18 @@ terms of the MIT license. A copy of the license can be found in the file #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else -#define mi_trace_message(...) +#define mi_trace_message(...) #endif #if defined(_MSC_VER) #define mi_decl_noinline __declspec(noinline) -#define mi_attr_noreturn +#define mi_attr_noreturn #elif defined(__GNUC__) || defined(__clang__) #define mi_decl_noinline __attribute__((noinline)) #define mi_attr_noreturn __attribute__((noreturn)) #else #define mi_decl_noinline -#define mi_attr_noreturn +#define mi_attr_noreturn #endif @@ -56,8 +56,6 @@ void _mi_os_init(void); // called fro void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); -int _mi_os_numa_node(mi_os_tld_t* tld); -int _mi_os_numa_node_count(void); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); @@ -146,8 +144,8 @@ bool _mi_page_is_valid(mi_page_t* page); Inlined definitions ----------------------------------------------------------- */ #define UNUSED(x) (void)(x) -#if (MI_DEBUG>0) -#define UNUSED_RELEASE(x) +#if (MI_DEBUG>0) +#define UNUSED_RELEASE(x) #else #define UNUSED_RELEASE(x) UNUSED(x) #endif @@ -398,7 +396,7 @@ static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* bl #endif } -static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { +static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST block->next = (mi_encoded_t)next ^ cookie; #else @@ -411,12 +409,12 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* #ifdef MI_ENCODE_FREELIST mi_block_t* next = mi_block_nextx(page->cookie,block); // check for free list corruption: is `next` at least in our segment range? - // TODO: it is better to check if it is actually inside our page but that is more expensive + // TODO: it is better to check if it is actually inside our page but that is more expensive // to calculate. Perhaps with a relative free list this becomes feasible? if (next!=NULL && !mi_is_in_same_segment(block, next)) { _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); next = NULL; - } + } return next; #else UNUSED(page); @@ -433,6 +431,25 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #endif } + +// ------------------------------------------------------------------- +// Optimize numa node access for the common case (= one node) +// ------------------------------------------------------------------- + +int _mi_os_numa_node_get(mi_os_tld_t* tld); +int _mi_os_numa_node_count_get(void); + +extern int _mi_numa_node_count; +static inline int _mi_os_numa_node(mi_os_tld_t* tld) { + if (mi_likely(_mi_numa_node_count == 1)) return 0; + else return _mi_os_numa_node_get(tld); +} +static inline int _mi_os_numa_node_count(void) { + if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count; + else return _mi_os_numa_node_count_get(); +} + + // ------------------------------------------------------------------- // Getting the thread id should be performant // as it is called in the fast path of `_mi_free`, diff --git a/src/os.c b/src/os.c index 5229381b..d6878927 100644 --- a/src/os.c +++ b/src/os.c @@ -786,9 +786,9 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; mi_win_enable_large_os_pages(); - + #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; + MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { @@ -818,7 +818,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation if (pVirtualAlloc2 != NULL && numa_node >= 0) { params[0].Type = MemExtendedParameterNumaNode; - params[0].ULong = (unsigned)numa_node; + params[0].ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } #endif @@ -838,7 +838,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); - // TODO: does `mbind` work correctly for huge OS pages? should we + // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); @@ -857,7 +857,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } #endif -#if (MI_INTPTR_SIZE >= 8) +#if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages static _Atomic(uintptr_t) mi_huge_start; // = 0 @@ -900,7 +900,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse size_t size = 0; uint8_t* start = mi_os_claim_huge_pages(pages, &size); if (start == NULL) return NULL; // or 32-bit systems - + // Allocate one page at the time but try to place them contiguously // We allocate one page at the time to be able to abort if it takes too long // or to at least allocate as many as available on the system. @@ -920,11 +920,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } break; } - + // success, record it _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - + // check for timeout if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); @@ -958,7 +958,7 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { } /* ---------------------------------------------------------------------------- -Support NUMA aware allocation +Support NUMA aware allocation -----------------------------------------------------------------------------*/ #ifdef WIN32 static int mi_os_numa_nodex() { @@ -975,9 +975,8 @@ static int mi_os_numa_node_countx(void) { return (int)(numa_max + 1); } #elif defined(__linux__) -#include -#include -#include +#include // getcpu +#include // access static int mi_os_numa_nodex(void) { #ifdef SYS_getcpu @@ -990,22 +989,15 @@ static int mi_os_numa_nodex(void) { return 0; #endif } - static int mi_os_numa_node_countx(void) { - DIR* d = opendir("/sys/devices/system/node"); - if (d==NULL) return 1; - - struct dirent* de; - int max_node_num = 0; - while ((de = readdir(d)) != NULL) { - int node_num; - if (strncmp(de->d_name, "node", 4) == 0) { - node_num = (int)strtol(de->d_name+4, NULL, 0); - if (max_node_num < node_num) max_node_num = node_num; - } + char buf[128]; + int max_node = mi_option_get(mi_option_max_numa_node); + int node = 0; + for(node = 0; node < max_node; node++) { + snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1); + if (access(buf,R_OK) != 0) break; } - closedir(d); - return (max_node_num + 1); + return (node+1); } #else static int mi_os_numa_nodex(void) { @@ -1016,29 +1008,30 @@ static int mi_os_numa_node_countx(void) { } #endif -int _mi_os_numa_node_count(void) { - static int numa_node_count = 0; // cache the node count - if (mi_unlikely(numa_node_count <= 0)) { - int ncount = mi_os_numa_node_countx(); +int _mi_numa_node_count = 0; // cache the node count + +int _mi_os_numa_node_count_get(void) { + if (mi_unlikely(_mi_numa_node_count <= 0)) { + int ncount = mi_os_numa_node_countx(); int ncount0 = ncount; // never more than max numa node and at least 1 int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); if (ncount > nmax) ncount = nmax; if (ncount <= 0) ncount = 1; - numa_node_count = ncount; - _mi_verbose_message("using %i numa regions (%i nodes detected)\n", numa_node_count, ncount0); + _mi_numa_node_count = ncount; + _mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0); } - mi_assert_internal(numa_node_count >= 1); - return numa_node_count; + mi_assert_internal(_mi_numa_node_count >= 1); + return _mi_numa_node_count; } -int _mi_os_numa_node(mi_os_tld_t* tld) { +int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); int numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 int numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - if (numa_node < 0) numa_node = 0; + if (numa_node < 0) numa_node = 0; return numa_node; } From af746ca4c1682e29dd42e8c0e6fa6db6aa04b200 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:17:39 -0800 Subject: [PATCH 082/293] inline bitmap_mask --- src/bitmap.inc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 3847e712..81f87a79 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -8,11 +8,11 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- This file is meant to be included in other files for efficiency. It implements a bitmap that can set/reset sequences of bits atomically -and is used to concurrently claim memory ranges. +and is used to concurrently claim memory ranges. A bitmap is an array of fields where each field is a machine word (`uintptr_t`) -A current limitation is that the bit sequences cannot cross fields +A current limitation is that the bit sequences cannot cross fields and that the sequence must be smaller or equal to the bits in a field. ---------------------------------------------------------------------------- */ #pragma once @@ -59,7 +59,7 @@ static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { // The bit mask for a given number of blocks at a specified bit index. -static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { +static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; return ((((uintptr_t)1 << count) - 1) << bitidx); @@ -104,10 +104,10 @@ static inline size_t mi_bsr(uintptr_t x) { Claim a bit sequence atomically ----------------------------------------------------------- */ -// Try to atomically claim a sequence of `count` bits in a single +// Try to atomically claim a sequence of `count` bits in a single // field at `idx` in `bitmap`. Returns `true` on success. -static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) -{ +static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +{ mi_assert_internal(bitmap_idx != NULL); volatile _Atomic(uintptr_t)* field = &bitmap[idx]; uintptr_t map = mi_atomic_read(field); @@ -136,7 +136,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con continue; } else { - // success, we claimed the bits! + // success, we claimed the bits! *bitmap_idx = mi_bitmap_index_create(idx, bitidx); return true; } @@ -205,4 +205,4 @@ static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields return ((mi_atomic_read(&bitmap[idx]) & mask) == mask); } -#endif \ No newline at end of file +#endif From 867d78f877474c7f36fd19bc2ea62918f117f068 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:19:52 -0800 Subject: [PATCH 083/293] reserve huge OS pages earlier on at process_init --- src/init.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/init.c b/src/init.c index 5967b4b9..473e9a32 100644 --- a/src/init.c +++ b/src/init.c @@ -19,7 +19,7 @@ const mi_page_t _mi_page_empty = { 0, #endif 0, // used - NULL, + NULL, ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), 0, NULL, NULL, NULL #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) @@ -246,7 +246,7 @@ static bool _mi_heap_done(void) { // switch to backing heap and free it heap = heap->tld->heap_backing; if (!mi_heap_is_initialized(heap)) return false; - + // collect if not the main thread if (heap != &_mi_heap_main) { _mi_heap_collect_abandon(heap); @@ -394,7 +394,7 @@ bool mi_is_redirected() mi_attr_noexcept { } // Communicate with the redirection module on Windows -#if defined(_WIN32) && defined(MI_SHARED_LIB) +#if defined(_WIN32) && defined(MI_SHARED_LIB) #ifdef __cplusplus extern "C" { #endif @@ -440,11 +440,6 @@ static void mi_process_load(void) { if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { _mi_fputs(NULL,NULL,msg); } - - if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - mi_reserve_huge_os_pages_interleave(pages, pages*500); - } } // Initialize the process; called by thread_init or the process loader @@ -471,6 +466,11 @@ void mi_process_init(void) mi_attr_noexcept { #endif mi_thread_init(); mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + + if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); + mi_reserve_huge_os_pages_interleave(pages, pages*500); + } } // Called when the process is done (through `at_exit`) @@ -497,7 +497,7 @@ static void mi_process_done(void) { #if defined(_WIN32) && defined(MI_SHARED_LIB) - // Windows DLL: easy to hook into process_init and thread_done + // Windows DLL: easy to hook into process_init and thread_done __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { UNUSED(reserved); UNUSED(inst); From d4f54dcf3049bd958ee262cbd9b3b0c7134d59ed Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:37:15 -0800 Subject: [PATCH 084/293] remove numaif dependency on linux --- CMakeLists.txt | 11 ----------- src/os.c | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 18bdea5a..a2258128 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,6 @@ option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanis option(MI_BUILD_TESTS "Build test executables" ON) include("cmake/mimalloc-config-version.cmake") -include("CheckIncludeFile") set(mi_install_dir "lib/mimalloc-${mi_version}") @@ -98,16 +97,6 @@ if(MI_USE_CXX MATCHES "ON") set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) endif() -CHECK_INCLUDE_FILE("numaif.h" MI_HAVE_NUMA_H) -if(MI_HAVE_NUMA_H) - list(APPEND mi_defines MI_HAS_NUMA) - list(APPEND mi_libraries numa) -else() - if (NOT(WIN32)) - message(WARNING "Compiling without using NUMA optimized allocation (on Linux, install libnuma-dev?)") - endif() -endif() - # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas) diff --git a/src/os.c b/src/os.c index d6878927..7af7363b 100644 --- a/src/os.c +++ b/src/os.c @@ -827,28 +827,35 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) -#ifdef MI_HAS_NUMA -#include // mbind, and use -lnuma +#include +#ifndef MPOL_PREFERRED +#define MPOL_PREFERRED 1 +#endif +#if defined(SYS_mbind) +static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); +} +#else +static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + UNUSED(start); UNUSED(len); UNUSED(mode); UNUSED(nmask); UNUSED(maxnode); UNUSED(flags); + return 0; +} #endif static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { mi_assert_internal(size%GiB == 0); bool is_large = true; void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } } - #else - UNUSED(numa_node); - #endif return p; } #else From bdb82748191ac5dbc436f0f62dcbebfd3df95157 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 12 Nov 2019 12:04:43 -0800 Subject: [PATCH 085/293] change max_numa_node to max_numa_nodes option --- include/mimalloc.h | 2 +- src/options.c | 2 +- src/os.c | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 67b17c73..8d029135 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -275,7 +275,7 @@ typedef enum mi_option_e { mi_option_eager_commit_delay, mi_option_reset_delay, mi_option_os_tag, - mi_option_max_numa_node, + mi_option_max_numa_nodes, mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index 81ffe88b..bbea4e67 100644 --- a/src/options.c +++ b/src/options.c @@ -70,7 +70,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node + { 256, UNINIT, MI_OPTION(max_numa_nodes) }, // use at most N numa nodes { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; diff --git a/src/os.c b/src/os.c index 7af7363b..93fb8b31 100644 --- a/src/os.c +++ b/src/os.c @@ -998,9 +998,10 @@ static int mi_os_numa_nodex(void) { } static int mi_os_numa_node_countx(void) { char buf[128]; - int max_node = mi_option_get(mi_option_max_numa_node); + int max_nodes = mi_option_get(mi_option_max_numa_nodes); // set to 0 to disable detection (and NUMA awareness) int node = 0; - for(node = 0; node < max_node; node++) { + for(node = 0; node < max_nodes; node++) { + // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1); if (access(buf,R_OK) != 0) break; } @@ -1022,7 +1023,7 @@ int _mi_os_numa_node_count_get(void) { int ncount = mi_os_numa_node_countx(); int ncount0 = ncount; // never more than max numa node and at least 1 - int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); + int nmax = (int)mi_option_get(mi_option_max_numa_nodes); if (ncount > nmax) ncount = nmax; if (ncount <= 0) ncount = 1; _mi_numa_node_count = ncount; From 29919a938dbd6f070ed84b146ad4d712946240ee Mon Sep 17 00:00:00 2001 From: Marco Wang Date: Wed, 13 Nov 2019 13:19:21 +0800 Subject: [PATCH 086/293] Avoid the use of variable argument list function --- include/mimalloc-atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index dff0f011..10368df3 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -220,7 +220,7 @@ static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) #endif #elif defined(__wasi__) #include - static inline void mi_atomic_yield() { + static inline void mi_atomic_yield(void) { sched_yield(); } #else From d01ed42bcb755ed6c1b52bfd8a306821da098dd5 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 13 Nov 2019 13:35:50 -0800 Subject: [PATCH 087/293] replace max_numa_nodes by use_numa_nodes (to help with wrong detection of numa nodes on WSL for example) --- include/mimalloc-internal.h | 8 +++--- include/mimalloc.h | 4 +-- src/arena.c | 15 +++++------ src/init.c | 2 +- src/options.c | 4 +-- src/os.c | 54 +++++++++++++++++-------------------- 6 files changed, 40 insertions(+), 47 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 668a7bd3..77045a99 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -436,15 +436,15 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c // Optimize numa node access for the common case (= one node) // ------------------------------------------------------------------- -int _mi_os_numa_node_get(mi_os_tld_t* tld); -int _mi_os_numa_node_count_get(void); +int _mi_os_numa_node_get(mi_os_tld_t* tld); +size_t _mi_os_numa_node_count_get(void); -extern int _mi_numa_node_count; +extern size_t _mi_numa_node_count; static inline int _mi_os_numa_node(mi_os_tld_t* tld) { if (mi_likely(_mi_numa_node_count == 1)) return 0; else return _mi_os_numa_node_get(tld); } -static inline int _mi_os_numa_node_count(void) { +static inline size_t _mi_os_numa_node_count(void) { if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count; else return _mi_os_numa_node_count_get(); } diff --git a/include/mimalloc.h b/include/mimalloc.h index 8d029135..3c942849 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -230,7 +230,7 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; // deprecated @@ -274,8 +274,8 @@ typedef enum mi_option_e { mi_option_segment_reset, mi_option_eager_commit_delay, mi_option_reset_delay, + mi_option_use_numa_nodes, mi_option_os_tag, - mi_option_max_numa_nodes, mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/arena.c b/src/arena.c index 02890bd6..46741208 100644 --- a/src/arena.c +++ b/src/arena.c @@ -42,7 +42,6 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_sec void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- Arena allocation @@ -317,22 +316,22 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec } -// reserve huge pages evenly among all numa nodes. -int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept { +// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected) +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept { if (pages == 0) return 0; // pages per numa node - int numa_count = _mi_os_numa_node_count(); + size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); if (numa_count <= 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs / numa_count) + 50; // reserve evenly among numa nodes - for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if ((size_t)numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); + if (numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; @@ -349,7 +348,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; - int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0)); + int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } diff --git a/src/init.c b/src/init.c index 473e9a32..72543b95 100644 --- a/src/init.c +++ b/src/init.c @@ -469,7 +469,7 @@ void mi_process_init(void) mi_attr_noexcept { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - mi_reserve_huge_os_pages_interleave(pages, pages*500); + mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); } } diff --git a/src/options.c b/src/options.c index bbea4e67..180f6a75 100644 --- a/src/options.c +++ b/src/options.c @@ -69,9 +69,9 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 256, UNINIT, MI_OPTION(max_numa_nodes) }, // use at most N numa nodes - { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index 93fb8b31..2415a40d 100644 --- a/src/os.c +++ b/src/os.c @@ -968,66 +968,61 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { Support NUMA aware allocation -----------------------------------------------------------------------------*/ #ifdef WIN32 -static int mi_os_numa_nodex() { +static size_t mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); GetNumaProcessorNodeEx(&pnum,&numa_node); - return (int)numa_node; + return numa_node; } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { ULONG numa_max = 0; GetNumaHighestNodeNumber(&numa_max); - return (int)(numa_max + 1); + return (numa_max + 1); } #elif defined(__linux__) #include // getcpu #include // access -static int mi_os_numa_nodex(void) { +static size_t mi_os_numa_nodex(void) { #ifdef SYS_getcpu - unsigned node = 0; - unsigned ncpu = 0; - int err = syscall(SYS_getcpu, &ncpu, &node, NULL); + unsigned long node = 0; + unsigned long ncpu = 0; + long err = syscall(SYS_getcpu, &ncpu, &node, NULL); if (err != 0) return 0; - return (int)node; + return node; #else return 0; #endif } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { char buf[128]; - int max_nodes = mi_option_get(mi_option_max_numa_nodes); // set to 0 to disable detection (and NUMA awareness) - int node = 0; - for(node = 0; node < max_nodes; node++) { + unsigned node = 0; + for(node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) - snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1); + snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); if (access(buf,R_OK) != 0) break; } return (node+1); } #else -static int mi_os_numa_nodex(void) { +static size_t mi_os_numa_nodex(void) { return 0; } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { return 1; } #endif -int _mi_numa_node_count = 0; // cache the node count +size_t _mi_numa_node_count = 0; // cache the node count -int _mi_os_numa_node_count_get(void) { +size_t _mi_os_numa_node_count_get(void) { if (mi_unlikely(_mi_numa_node_count <= 0)) { - int ncount = mi_os_numa_node_countx(); - int ncount0 = ncount; - // never more than max numa node and at least 1 - int nmax = (int)mi_option_get(mi_option_max_numa_nodes); - if (ncount > nmax) ncount = nmax; - if (ncount <= 0) ncount = 1; - _mi_numa_node_count = ncount; - _mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0); + long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? + if (ncount <= 0) ncount = (long)mi_os_numa_node_countx(); // or detect dynamically + _mi_numa_node_count = (size_t)(ncount <= 0 ? 1 : ncount); + _mi_verbose_message("using %zd numa regions\n", _mi_numa_node_count); } mi_assert_internal(_mi_numa_node_count >= 1); return _mi_numa_node_count; @@ -1035,11 +1030,10 @@ int _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); - int numa_count = _mi_os_numa_node_count(); + size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - int numa_node = mi_os_numa_nodex(); + size_t numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - if (numa_node < 0) numa_node = 0; - return numa_node; + return (int)numa_node; } From a4ed63d1273befbe2c8835395f3137564d3af7e9 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 13 Nov 2019 17:22:03 -0800 Subject: [PATCH 088/293] Adresses pr #165 and issue #164 by @colesbury: On Mac OS, the thread-local _mi_default_heap may get reset before _mi_thread_done is called, leaking the default heap on non-main threads. Now the current default heap is also stored in mi_pthread_key (or mi_fls_key on Windows). The _mi_thread_done function is called with this value. --- include/mimalloc-internal.h | 1 + src/heap.c | 6 ++-- src/init.c | 59 +++++++++++++++++++++---------------- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index ccf12a06..73849337 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -101,6 +101,7 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); uintptr_t _mi_heap_random(mi_heap_t* heap); +void _mi_heap_set_default_direct(mi_heap_t* heap); // "stats.c" void _mi_stats_done(mi_stats_t* stats); diff --git a/src/heap.c b/src/heap.c index 15c5d02a..daa9b241 100644 --- a/src/heap.c +++ b/src/heap.c @@ -223,7 +223,7 @@ static void mi_heap_free(mi_heap_t* heap) { // reset default if (mi_heap_is_default(heap)) { - _mi_heap_default = heap->tld->heap_backing; + _mi_heap_set_default_direct(heap->tld->heap_backing); } // and free the used memory mi_free(heap); @@ -354,8 +354,8 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { mi_assert(mi_heap_is_initialized(heap)); if (!mi_heap_is_initialized(heap)) return NULL; mi_assert_expensive(mi_heap_is_valid(heap)); - mi_heap_t* old = _mi_heap_default; - _mi_heap_default = heap; + mi_heap_t* old = mi_get_default_heap(); + _mi_heap_set_default_direct(heap); return old; } diff --git a/src/init.c b/src/init.c index e15d82eb..081e7ce7 100644 --- a/src/init.c +++ b/src/init.c @@ -90,6 +90,7 @@ const mi_heap_t _mi_heap_empty = { false }; +// the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; @@ -198,8 +199,8 @@ static bool _mi_heap_init(void) { if (mi_heap_is_initialized(_mi_heap_default)) return true; if (_mi_is_main_thread()) { // the main heap is statically allocated - _mi_heap_default = &_mi_heap_main; - mi_assert_internal(_mi_heap_default->tld->heap_backing == _mi_heap_default); + _mi_heap_set_default_direct(&_mi_heap_main); + mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); } else { // use `_mi_os_alloc` to allocate directly from the OS @@ -219,18 +220,17 @@ static bool _mi_heap_init(void) { tld->heap_backing = heap; tld->segments.stats = &tld->stats; tld->os.stats = &tld->stats; - _mi_heap_default = heap; + _mi_heap_set_default_direct(heap); } return false; } // Free the thread local default heap (called from `mi_thread_done`) -static bool _mi_heap_done(void) { - mi_heap_t* heap = _mi_heap_default; +static bool _mi_heap_done(mi_heap_t* heap) { if (!mi_heap_is_initialized(heap)) return true; // reset default heap - _mi_heap_default = (_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); + _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); // todo: delete all non-backing heaps? @@ -277,6 +277,8 @@ static bool _mi_heap_done(void) { // to set up the thread local keys. // -------------------------------------------------------- +static void _mi_thread_done(mi_heap_t* default_heap); + #ifdef __wasi__ // no pthreads in the WebAssembly Standard Interface #elif !defined(_WIN32) @@ -291,14 +293,14 @@ static bool _mi_heap_done(void) { #include static DWORD mi_fls_key; static void NTAPI mi_fls_done(PVOID value) { - if (value!=NULL) mi_thread_done(); + if (value!=NULL) _mi_thread_done((mi_heap_t*)value); } #elif defined(MI_USE_PTHREADS) // use pthread locol storage keys to detect thread ending #include static pthread_key_t mi_pthread_key; static void mi_pthread_done(void* value) { - if (value!=NULL) mi_thread_done(); + if (value!=NULL) _mi_thread_done((mi_heap_t*)value); } #elif defined(__wasi__) // no pthreads in the WebAssembly Standard Interface @@ -332,6 +334,8 @@ void mi_thread_init(void) mi_attr_noexcept mi_process_init(); // initialize the thread local default heap + // (this will call `_mi_heap_set_default_direct` and thus set the + // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) if (_mi_heap_init()) return; // returns true if already initialized // don't further initialize for the main thread @@ -339,33 +343,38 @@ void mi_thread_init(void) mi_attr_noexcept _mi_stat_increase(&mi_get_default_heap()->tld->stats.threads, 1); - // set hooks so our mi_thread_done() will be called - #if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain - #elif defined(_WIN32) && !defined(MI_SHARED_LIB) - FlsSetValue(mi_fls_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_fls_done` is called - #elif defined(MI_USE_PTHREADS) - pthread_setspecific(mi_pthread_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_pthread_done` is called - #endif - //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); } void mi_thread_done(void) mi_attr_noexcept { + _mi_thread_done(mi_get_default_heap()); +} + +static void _mi_thread_done(mi_heap_t* heap) { // stats - mi_heap_t* heap = mi_get_default_heap(); if (!_mi_is_main_thread() && mi_heap_is_initialized(heap)) { _mi_stat_decrease(&heap->tld->stats.threads, 1); } - // abandon the thread local heap - if (_mi_heap_done()) return; // returns true if already ran - - //if (!_mi_is_main_thread()) { - // _mi_verbose_message("thread done: 0x%zx\n", _mi_thread_id()); - //} + if (_mi_heap_done(heap)) return; // returns true if already ran } +void _mi_heap_set_default_direct(mi_heap_t* heap) { + mi_assert_internal(heap != NULL); + _mi_heap_default = heap; + + // ensure the default heap is passed to `_mi_thread_done` + // setting to a non-NULL value also ensures `mi_thread_done` is called. + #if defined(_WIN32) && defined(MI_SHARED_LIB) + // nothing to do as it is done in DllMain + #elif defined(_WIN32) && !defined(MI_SHARED_LIB) + FlsSetValue(mi_fls_key, heap); + #elif defined(MI_USE_PTHREADS) + pthread_setspecific(mi_pthread_key, heap); + #endif +} + + // -------------------------------------------------------- // Run functions on process init/done, and thread init/done @@ -446,7 +455,7 @@ void mi_process_init(void) mi_attr_noexcept { // access _mi_heap_default before setting _mi_process_is_initialized to ensure // that the TLS slot is allocated without getting into recursion on macOS // when using dynamic linking with interpose. - mi_heap_t* h = _mi_heap_default; + mi_heap_t* h = mi_get_default_heap(); _mi_process_is_initialized = true; _mi_heap_main.thread_id = _mi_thread_id(); From dccffea66286dfb16e642aef3fea7babee7038e3 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 14 Nov 2019 11:01:05 -0800 Subject: [PATCH 089/293] fix pr #173 by @zerodefect to use case-insensitive matching of the build type; also use MI_DEBUG_FULL option (instead of MI_CHECK_FULL) --- CMakeLists.txt | 74 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b455881..aa9c126f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,15 +6,14 @@ set(CMAKE_CXX_STANDARD 17) option(MI_OVERRIDE "Override the standard malloc interface" ON) option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON) -option(MI_SEE_ASM "Generate assembly files" OFF) -option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF) -option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) -option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF) +option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode" OFF) +option(MI_SECURE "Use security mitigations (like guard pages, allocation randomization, and free-list corruption detection)" OFF) option(MI_SECURE_FULL "Use full security mitigations, may be more expensive (includes double-free mitigation)" OFF) +option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) +option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) - -set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}") +option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) set(mi_sources src/stats.c @@ -29,29 +28,33 @@ set(mi_sources src/options.c src/init.c) -# Set default build type +# ----------------------------------------------------------------------------- +# Converience: set default build type depending on the build directory +# ----------------------------------------------------------------------------- + if (NOT CMAKE_BUILD_TYPE) - if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$") - message(STATUS "No build type selected, default to *** Debug ***") + if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR MI_DEBUG_FULL MATCHES "ON") + message(STATUS "No build type selected, default to: Debug") set(CMAKE_BUILD_TYPE "Debug") else() - message(STATUS "No build type selected, default to *** Release ***") + message(STATUS "No build type selected, default to: Release") set(CMAKE_BUILD_TYPE "Release") endif() -else() - message(STATUS "Build type specified as *** ${CMAKE_BUILD_TYPE} ***") endif() if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") + message(STATUS "Default to secure build") set(MI_SECURE "ON") endif() +# ----------------------------------------------------------------------------- +# Process options +# ----------------------------------------------------------------------------- + if(CMAKE_C_COMPILER_ID MATCHES "MSVC") set(MI_USE_CXX "ON") endif() - -# Options if(MI_OVERRIDE MATCHES "ON") message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") if(APPLE) @@ -84,7 +87,12 @@ if(MI_SEE_ASM MATCHES "ON") endif() if(MI_CHECK_FULL MATCHES "ON") - message(STATUS "Set debug level to full invariant checking (MI_CHECK_FULL=ON)") + message(STATUS "The MI_CHECK_FULL option is deprecated, use MI_DEBUG_FULL instead") + set(MI_DEBUG_FULL "ON") +endif() + +if(MI_DEBUG_FULL MATCHES "ON") + message(STATUS "Set debug level to full invariant checking (MI_DEBUG_FULL=ON)") list(APPEND mi_defines MI_DEBUG=3) # full invariant checking endif() @@ -109,19 +117,6 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") endif() endif() -if(NOT(CMAKE_BUILD_TYPE MATCHES "Release|release|RelWithDebInfo|relwithdebinfo")) - string(TOLOWER "${CMAKE_BUILD_TYPE}" build_type) - set(mi_basename "mimalloc-${build_type}") -else() - if(MI_SECURE MATCHES "ON") - set(mi_basename "mimalloc-secure") - else() - set(mi_basename "mimalloc") - endif() -endif() -message(STATUS "Output library name : ${mi_basename}") -message(STATUS "Installation directory: ${mi_install_dir}") - # extra needed libraries if(WIN32) list(APPEND mi_libraries psapi shell32 user32) @@ -134,9 +129,28 @@ else() endif() # ----------------------------------------------------------------------------- -# Main targets +# Install and output names # ----------------------------------------------------------------------------- +set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}") +if(MI_SECURE MATCHES "ON") + set(mi_basename "mimalloc-secure") +else() + set(mi_basename "mimalloc") +endif() +string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC) +if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel)$")) + set(mi_basename "${mi_basename}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version +endif() +message(STATUS "") +message(STATUS "Library base name: ${mi_basename}") +message(STATUS "Build type : ${CMAKE_BUILD_TYPE_LC}") +message(STATUS "Install directory: ${mi_install_dir}") +message(STATUS "") + +# ----------------------------------------------------------------------------- +# Main targets +# ----------------------------------------------------------------------------- # shared library add_library(mimalloc SHARED ${mi_sources}) @@ -238,7 +252,7 @@ endif() if (MI_OVERRIDE MATCHES "ON") target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE) if(NOT WIN32) - # It is only possible to override malloc on Windows when building as a DLL. (src/alloc-override.c) + # It is only possible to override malloc on Windows when building as a DLL. target_compile_definitions(mimalloc-static PRIVATE MI_MALLOC_OVERRIDE) target_compile_definitions(mimalloc-obj PRIVATE MI_MALLOC_OVERRIDE) endif() From 8637f113d5ed817fa93e584d716d2b5c91ca723f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 15 Nov 2019 14:09:17 -0800 Subject: [PATCH 090/293] improve test-stress to run multiple iterations --- test/test-stress.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index bb428072..4b6ec22d 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -18,7 +18,8 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 20; // scaling factor +static int N = 20; // scaling factor +static int ITER = 10; // N full iterations re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor @@ -159,14 +160,17 @@ int main(int argc, char** argv) { //bench_start_program(); mi_stats_reset(); - memset((void*)transfer, 0, TRANSFERS*sizeof(void*)); - run_os_threads(THREADS); - for (int i = 0; i < TRANSFERS; i++) { - free_items((void*)transfer[i]); + for (int i = 0; i < ITER; i++) { + memset((void*)transfer, 0, TRANSFERS * sizeof(void*)); + run_os_threads(THREADS); + for (int i = 0; i < TRANSFERS; i++) { + free_items((void*)transfer[i]); + } } - #ifndef NDEBUG +#ifndef NDEBUG mi_collect(false); - #endif +#endif + mi_stats_print(NULL); //bench_end_program(); return 0; From fd3ce5dc7d22bf4155588ac2755a98e4a405303f Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 15 Nov 2019 16:28:11 -0800 Subject: [PATCH 091/293] improve stress test --- ide/vs2019/mimalloc-test-stress.vcxproj | 4 +- test/test-stress.c | 86 ++++++++++++++++--------- 2 files changed, 59 insertions(+), 31 deletions(-) diff --git a/ide/vs2019/mimalloc-test-stress.vcxproj b/ide/vs2019/mimalloc-test-stress.vcxproj index afbb6666..ef7ab357 100644 --- a/ide/vs2019/mimalloc-test-stress.vcxproj +++ b/ide/vs2019/mimalloc-test-stress.vcxproj @@ -149,8 +149,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/test/test-stress.c b/test/test-stress.c index 4b6ec22d..b6ceaa0a 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -6,7 +6,8 @@ terms of the MIT license. /* This is a stress test for the allocator, using multiple threads and transferring objects between threads. This is not a typical workload - but uses a random linear size distribution. Do not use this test as a benchmark! + but uses a random linear size distribution. Timing can also depend on + (random) thread scheduling. Do not use this test as a benchmark! */ #include @@ -18,16 +19,31 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 20; // scaling factor -static int ITER = 10; // N full iterations re-creating all threads +static int SCALE = 12; // scaling factor +static int ITER = 50; // N full iterations re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors -// static int N = 100; // scaling factor +// static int SCALE = 100; // scaling factor +static bool allow_large_objects = true; // allow very large objects? +static size_t use_one_size = 0; // use single object size of N uintptr_t? + + +#ifdef USE_STD_MALLOC +#define custom_malloc(s) malloc(s) +#define custom_realloc(p,s) realloc(p,s) +#define custom_free(p) free(p) +#else +#define custom_malloc(s) mi_malloc(s) +#define custom_realloc(p,s) mi_realloc(p,s) +#define custom_free(p) mi_free(p) +#endif + +// transfer pointer between threads #define TRANSFERS (1000) - static volatile void* transfer[TRANSFERS]; + #if (UINTPTR_MAX != UINT32_MAX) const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; #else @@ -64,10 +80,17 @@ static bool chance(size_t perc, random_t r) { } static void* alloc_items(size_t items, random_t r) { - if (chance(1, r)) items *= 100; // 1% huge objects; + if (chance(1, r)) { + if (chance(1, r) && allow_large_objects) items *= 1000; // 0.01% giant + else if (chance(10, r) && allow_large_objects) items *= 100; // 0.1% huge + else items *= 10; // 1% large objects; + } if (items==40) items++; // pthreads uses that size for stack increases - uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); - for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + if (use_one_size>0) items = (use_one_size/sizeof(uintptr_t)); + uintptr_t* p = (uintptr_t*)custom_malloc(items*sizeof(uintptr_t)); + if (p != NULL) { + for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + } return p; } @@ -82,7 +105,7 @@ static void free_items(void* p) { } } } - mi_free(p); + custom_free(p); } @@ -91,12 +114,12 @@ static void stress(intptr_t tid) { uintptr_t r = tid ^ 42; const size_t max_item = 128; // in words const size_t max_item_retained = 10*max_item; - size_t allocs = 25*N*(tid%8 + 1); // some threads do more + size_t allocs = 25*SCALE*(tid%8 + 1); // some threads do more size_t retain = allocs/2; void** data = NULL; size_t data_size = 0; size_t data_top = 0; - void** retained = (void**)mi_malloc(retain*sizeof(void*)); + void** retained = (void**)custom_malloc(retain*sizeof(void*)); size_t retain_top = 0; while (allocs>0 || retain>0) { @@ -105,7 +128,7 @@ static void stress(intptr_t tid) { allocs--; if (data_top >= data_size) { data_size += 100000; - data = (void**)mi_realloc(data, data_size*sizeof(void*)); + data = (void**)custom_realloc(data, data_size*sizeof(void*)); } data[data_top++] = alloc_items((pick(&r) % max_item) + 1, &r); } @@ -121,7 +144,7 @@ static void stress(intptr_t tid) { data[idx] = NULL; } if (chance(25, &r) && data_top > 0) { - // 25% transfer-swap + // 25% exchange a local pointer with the (shared) transfer buffer. size_t data_idx = pick(&r) % data_top; size_t transfer_idx = pick(&r) % TRANSFERS; void* p = data[data_idx]; @@ -136,8 +159,8 @@ static void stress(intptr_t tid) { for (size_t i = 0; i < data_top; i++) { free_items(data[i]); } - mi_free(retained); - mi_free(data); + custom_free(retained); + custom_free(data); //bench_end_thread(); } @@ -152,25 +175,29 @@ int main(int argc, char** argv) { if (argc>=3) { char* end; long n = (strtol(argv[2], &end, 10)); - if (n > 0) N = n; + if (n > 0) SCALE = n; } - printf("start with %i threads with a %i%% load-per-thread\n", THREADS, N); + printf("start with %i threads with a %i%% load-per-thread\n", THREADS, SCALE); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); - //bench_start_program(); + //bench_start_program(); + + // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. mi_stats_reset(); - for (int i = 0; i < ITER; i++) { - memset((void*)transfer, 0, TRANSFERS * sizeof(void*)); + uintptr_t r = 43; + for (int n = 0; n < ITER; n++) { run_os_threads(THREADS); for (int i = 0; i < TRANSFERS; i++) { - free_items((void*)transfer[i]); + if (chance(50, &r) || n+1 == ITER) { // free all on last run, otherwise free half of the transfers + void* p = atomic_exchange_ptr(&transfer[i], NULL); + free_items(p); + } } } -#ifndef NDEBUG - mi_collect(false); -#endif + mi_collect(false); + mi_collect(true); mi_stats_print(NULL); //bench_end_program(); return 0; @@ -187,8 +214,8 @@ static DWORD WINAPI thread_entry(LPVOID param) { } static void run_os_threads(size_t nthreads) { - DWORD* tids = (DWORD*)malloc(nthreads * sizeof(DWORD)); - HANDLE* thandles = (HANDLE*)malloc(nthreads * sizeof(HANDLE)); + DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD)); + HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE)); for (uintptr_t i = 0; i < nthreads; i++) { thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); } @@ -198,8 +225,8 @@ static void run_os_threads(size_t nthreads) { for (size_t i = 0; i < nthreads; i++) { CloseHandle(thandles[i]); } - free(tids); - free(thandles); + custom_free(tids); + custom_free(thandles); } static void* atomic_exchange_ptr(volatile void** p, void* newval) { @@ -220,7 +247,7 @@ static void* thread_entry(void* param) { } static void run_os_threads(size_t nthreads) { - pthread_t* threads = (pthread_t*)mi_malloc(nthreads*sizeof(pthread_t)); + pthread_t* threads = (pthread_t*)custom_malloc(nthreads*sizeof(pthread_t)); memset(threads, 0, sizeof(pthread_t)*nthreads); //pthread_setconcurrency(nthreads); for (uintptr_t i = 0; i < nthreads; i++) { @@ -229,6 +256,7 @@ static void run_os_threads(size_t nthreads) { for (size_t i = 0; i < nthreads; i++) { pthread_join(threads[i], NULL); } + custom_free(threads); } static void* atomic_exchange_ptr(volatile void** p, void* newval) { From 94bfb4772575d43bb11247b957ee5c3741a97a1a Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 12:59:26 -0800 Subject: [PATCH 092/293] update stress test for more realisitic size distribution --- test/test-stress.c | 87 ++++++++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index b6ceaa0a..6b2fb8c4 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -7,7 +7,7 @@ terms of the MIT license. /* This is a stress test for the allocator, using multiple threads and transferring objects between threads. This is not a typical workload but uses a random linear size distribution. Timing can also depend on - (random) thread scheduling. Do not use this test as a benchmark! + (random) thread scheduling. Do not use this test as a benchmark! */ #include @@ -17,10 +17,12 @@ terms of the MIT license. #include #include +// > mimalloc-test-stress [THREADS] [SCALE] [ITER] +// // argument defaults -static int THREADS = 32; // more repeatable if THREADS <= #processors -static int SCALE = 12; // scaling factor -static int ITER = 50; // N full iterations re-creating all threads +static int THREADS = 32; // more repeatable if THREADS <= #processors +static int SCALE = 50; // scaling factor +static int ITER = 10; // N full iterations re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor @@ -56,21 +58,21 @@ typedef uintptr_t* random_t; static uintptr_t pick(random_t r) { uintptr_t x = *r; - #if (UINTPTR_MAX > UINT32_MAX) - // by Sebastiano Vigna, see: +#if (UINTPTR_MAX > UINT32_MAX) + // by Sebastiano Vigna, see: x ^= x >> 30; x *= 0xbf58476d1ce4e5b9UL; x ^= x >> 27; x *= 0x94d049bb133111ebUL; x ^= x >> 31; - #else - // by Chris Wellons, see: +#else + // by Chris Wellons, see: x ^= x >> 16; x *= 0x7feb352dUL; x ^= x >> 15; x *= 0x846ca68bUL; x ^= x >> 16; - #endif +#endif *r = x; return x; } @@ -81,13 +83,13 @@ static bool chance(size_t perc, random_t r) { static void* alloc_items(size_t items, random_t r) { if (chance(1, r)) { - if (chance(1, r) && allow_large_objects) items *= 1000; // 0.01% giant - else if (chance(10, r) && allow_large_objects) items *= 100; // 0.1% huge - else items *= 10; // 1% large objects; + if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant + else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge + else items *= 100; // 1% large objects; } - if (items==40) items++; // pthreads uses that size for stack increases - if (use_one_size>0) items = (use_one_size/sizeof(uintptr_t)); - uintptr_t* p = (uintptr_t*)custom_malloc(items*sizeof(uintptr_t)); + if (items == 40) items++; // pthreads uses that size for stack increases + if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t)); + uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t)); if (p != NULL) { for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; } @@ -99,7 +101,7 @@ static void free_items(void* p) { uintptr_t* q = (uintptr_t*)p; uintptr_t items = (q[0] ^ cookie); for (uintptr_t i = 0; i < items; i++) { - if ((q[i]^cookie) != items - i) { + if ((q[i] ^ cookie) != items - i) { fprintf(stderr, "memory corruption at block %p at %zu\n", p, i); abort(); } @@ -111,30 +113,30 @@ static void free_items(void* p) { static void stress(intptr_t tid) { //bench_start_thread(); - uintptr_t r = tid ^ 42; - const size_t max_item = 128; // in words - const size_t max_item_retained = 10*max_item; - size_t allocs = 25*SCALE*(tid%8 + 1); // some threads do more - size_t retain = allocs/2; + uintptr_t r = tid * 43; + const size_t max_item_shift = 5; // 128 + const size_t max_item_retained_shift = max_item_shift + 2; + size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more + size_t retain = allocs / 2; void** data = NULL; size_t data_size = 0; size_t data_top = 0; - void** retained = (void**)custom_malloc(retain*sizeof(void*)); + void** retained = (void**)custom_malloc(retain * sizeof(void*)); size_t retain_top = 0; - while (allocs>0 || retain>0) { + while (allocs > 0 || retain > 0) { if (retain == 0 || (chance(50, &r) && allocs > 0)) { // 50%+ alloc allocs--; if (data_top >= data_size) { data_size += 100000; - data = (void**)custom_realloc(data, data_size*sizeof(void*)); + data = (void**)custom_realloc(data, data_size * sizeof(void*)); } - data[data_top++] = alloc_items((pick(&r) % max_item) + 1, &r); + data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r); } else { // 25% retain - retained[retain_top++] = alloc_items(10*((pick(&r) % max_item_retained) + 1), &r); + retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r); retain--; } if (chance(66, &r) && data_top > 0) { @@ -167,36 +169,45 @@ static void stress(intptr_t tid) { static void run_os_threads(size_t nthreads); int main(int argc, char** argv) { - if (argc>=2) { + // > mimalloc-test-stress [THREADS] [SCALE] [ITER] + if (argc >= 2) { char* end; long n = strtol(argv[1], &end, 10); if (n > 0) THREADS = n; } - if (argc>=3) { + if (argc >= 3) { char* end; long n = (strtol(argv[2], &end, 10)); if (n > 0) SCALE = n; } - printf("start with %i threads with a %i%% load-per-thread\n", THREADS, SCALE); + if (argc >= 4) { + char* end; + long n = (strtol(argv[3], &end, 10)); + if (n > 0) ITER = n; + } + printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); - //bench_start_program(); + //bench_start_program(); // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. mi_stats_reset(); - uintptr_t r = 43; + uintptr_t r = 43 * 43; for (int n = 0; n < ITER; n++) { run_os_threads(THREADS); for (int i = 0; i < TRANSFERS; i++) { - if (chance(50, &r) || n+1 == ITER) { // free all on last run, otherwise free half of the transfers + if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers void* p = atomic_exchange_ptr(&transfer[i], NULL); free_items(p); } } + mi_collect(false); +#ifndef NDEBUG + if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); } +#endif } - mi_collect(false); mi_collect(true); mi_stats_print(NULL); //bench_end_program(); @@ -230,11 +241,11 @@ static void run_os_threads(size_t nthreads) { } static void* atomic_exchange_ptr(volatile void** p, void* newval) { - #if (INTPTR_MAX == UINT32_MAX) +#if (INTPTR_MAX == UINT32_MAX) return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval); - #else +#else return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval); - #endif +#endif } #else @@ -247,8 +258,8 @@ static void* thread_entry(void* param) { } static void run_os_threads(size_t nthreads) { - pthread_t* threads = (pthread_t*)custom_malloc(nthreads*sizeof(pthread_t)); - memset(threads, 0, sizeof(pthread_t)*nthreads); + pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t)); + memset(threads, 0, sizeof(pthread_t) * nthreads); //pthread_setconcurrency(nthreads); for (uintptr_t i = 0; i < nthreads; i++) { pthread_create(&threads[i], NULL, &thread_entry, (void*)i); From 4d4a2885f5ef5d0b3db8de149b472380f495e729 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 13:19:17 -0800 Subject: [PATCH 093/293] use atomic read/write on the page->heap field where concurrent interaction is possible --- src/alloc.c | 2 +- src/page-queue.c | 6 +++--- src/page.c | 21 ++++++++++++++------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index d2319f82..c4863115 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -235,7 +235,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc } else { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* heap = page->heap; + mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) diff --git a/src/page-queue.c b/src/page-queue.c index 4af70b50..95443a69 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -260,7 +260,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { page->heap->page_count--; page->next = NULL; page->prev = NULL; - page->heap = NULL; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } @@ -274,7 +274,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); - page->heap = heap; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { @@ -338,7 +338,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue // set append pages to new heap and count size_t count = 0; for (mi_page_t* page = append->first; page != NULL; page = page->next) { - page->heap = heap; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); count++; } diff --git a/src/page.c b/src/page.c index aaf1cb91..a8115d27 100644 --- a/src/page.c +++ b/src/page.c @@ -343,18 +343,24 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(page->heap != NULL); - _mi_page_use_delayed_free(page,MI_NEVER_DELAYED_FREE); +#if MI_DEBUG > 1 + mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); +#endif + + // remove from our page list + mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_page_queue_remove(pq, page); + + // page is no longer associated with our heap + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)page->heap->thread_delayed_free; block != NULL; block = mi_block_nextx(page->heap->cookie,block)) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap->cookie, block)) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif - // and then remove from our page list - mi_segments_tld_t* segments_tld = &page->heap->tld->segments; - mi_page_queue_remove(pq, page); - // and abandon it mi_assert_internal(page->heap == NULL); _mi_segment_page_abandon(page,segments_tld); @@ -755,7 +761,8 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->used==1); mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue - page->heap = NULL; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { _mi_stat_increase(&heap->tld->stats.giant, block_size); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); From 30e2c54adba9f1d2ef32e35e4e6c4b80e5732c26 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 14:13:02 -0800 Subject: [PATCH 094/293] remove delayed reset option (for now) --- include/mimalloc.h | 2 +- src/memory.c | 139 ++------------------------ src/options.c | 4 +- src/os.c | 237 +++++++++++++++++++++++++-------------------- 4 files changed, 142 insertions(+), 240 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 3c942849..a59b9cf7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -273,7 +273,7 @@ typedef enum mi_option_e { mi_option_page_reset, mi_option_segment_reset, mi_option_eager_commit_delay, - mi_option_reset_delay, + mi_option_reset_decommits, mi_option_use_numa_nodes, mi_option_os_tag, mi_option_max_errors, diff --git a/src/memory.c b/src/memory.c index f3052d6b..b0bcf7a0 100644 --- a/src/memory.c +++ b/src/memory.c @@ -53,9 +53,6 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -// local -static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size); - // Constants #if (MI_INTPTR_SIZE==8) @@ -354,8 +351,6 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { if (p==NULL) return; if (size==0) return; - mi_delay_remove(tld->reset_delay, p, size); - size_t arena_memid = 0; mi_bitmap_index_t bit_idx; mem_region_t* region; @@ -424,7 +419,6 @@ void _mi_mem_collect(mi_os_tld_t* tld) { bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE); _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); } // and release @@ -434,142 +428,23 @@ void _mi_mem_collect(mi_os_tld_t* tld) { } } -/* ---------------------------------------------------------------------------- - Delay slots ------------------------------------------------------------------------------*/ - -typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); - -static void mi_delay_insert(mi_delay_slots_t* ds, - mi_msecs_t delay, uint8_t* addr, size_t size, - mi_delay_resolve_fun* resolve, void* arg) -{ - if (ds == NULL || delay==0 || addr==NULL || size==0) { - resolve(addr, size, arg); - return; - } - - mi_msecs_t now = _mi_clock_now(); - mi_delay_slot_t* oldest = &ds->slots[0]; - // walk through all slots, resolving expired ones. - // remember the oldest slot to insert the new entry in. - size_t newcount = 0; - for (size_t i = 0; i < ds->count; i++) { - mi_delay_slot_t* slot = &ds->slots[i]; - - if (slot->expire == 0) { - // empty slot - oldest = slot; - } - // TODO: should we handle overlapping areas too? - else if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { - // earlier slot encompasses new area, increase expiration - slot->expire = now + delay; - delay = 0; - } - else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { - // new one encompasses old slot, overwrite - slot->expire = now + delay; - slot->addr = addr; - slot->size = size; - delay = 0; - } - else if (slot->expire < now) { - // expired slot, resolve now - slot->expire = 0; - resolve(slot->addr, slot->size, arg); - } - else if (oldest->expire > slot->expire) { - oldest = slot; - newcount = i+1; - } - else { - newcount = i+1; - } - } - ds->count = newcount; - if (delay>0) { - // not yet registered, use the oldest slot (or a new one if there is space) - if (ds->count < ds->capacity) { - oldest = &ds->slots[ds->count]; - ds->count++; - } - else if (oldest->expire > 0) { - resolve(oldest->addr, oldest->size, arg); // evict if not empty - } - mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count); - oldest->expire = now + delay; - oldest->addr = addr; - oldest->size = size; - } -} - -static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size) -{ - if (ds == NULL || p==NULL || size==0) return false; - - uint8_t* addr = (uint8_t*)p; - bool done = false; - size_t newcount = 0; - - // walk through all valid slots - for (size_t i = 0; i < ds->count; i++) { - mi_delay_slot_t* slot = &ds->slots[i]; - if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { - // earlier slot encompasses the area; remove it - slot->expire = 0; - done = true; - } - else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { - // new one encompasses old slot, remove it - slot->expire = 0; - } - else if ((addr <= slot->addr && addr + size > slot->addr) || - (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { - // partial overlap - // can happen with a large object spanning onto some partial end block - // mi_assert_internal(false); - slot->expire = 0; - } - else { - newcount = i + 1; - } - } - ds->count = newcount; - return done; -} - -static void mi_resolve_reset(void* p, size_t size, void* vtld) { - mi_os_tld_t* tld = (mi_os_tld_t*)vtld; - _mi_os_reset(p, size, tld->stats); -} - -bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay), - (uint8_t*)p, size, &mi_resolve_reset, tld); - return true; -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) { - return _mi_os_unreset(p, size, is_zero, tld->stats); - } - return true; -} - - /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { + return _mi_os_reset(p, size, tld->stats); +} + +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + return _mi_os_unreset(p, size, is_zero, tld->stats); +} bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay,p, size); return _mi_os_commit(p, size, is_zero, tld->stats); } bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay, p, size); return _mi_os_decommit(p, size, tld->stats); } diff --git a/src/options.c b/src/options.c index 180f6a75..8c4c1707 100644 --- a/src/options.c +++ b/src/options.c @@ -65,10 +65,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset uses decommit/commit { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/os.c b/src/os.c index 2415a40d..02683a02 100644 --- a/src/os.c +++ b/src/os.c @@ -77,11 +77,11 @@ static bool use_large_os_page(size_t size, size_t alignment) { // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { size_t align_size; - if (size < 512*KiB) align_size = _mi_os_page_size(); - else if (size < 2*MiB) align_size = 64*KiB; - else if (size < 8*MiB) align_size = 256*KiB; - else if (size < 32*MiB) align_size = 1*MiB; - else align_size = 4*MiB; + if (size < 512 * KiB) align_size = _mi_os_page_size(); + else if (size < 2 * MiB) align_size = 64 * KiB; + else if (size < 8 * MiB) align_size = 256 * KiB; + else if (size < 32 * MiB) align_size = 1 * MiB; + else align_size = 4 * MiB; if (size >= (SIZE_MAX - align_size)) return size; // possible overflow? return _mi_align_up(size, align_size); } @@ -92,8 +92,8 @@ size_t _mi_os_good_alloc_size(size_t size) { // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) // We hide MEM_EXTENDED_PARAMETER to compile with older SDK's. #include -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); -typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); +typedef PVOID(__stdcall* PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); +typedef NTSTATUS(__stdcall* PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; @@ -129,7 +129,7 @@ static bool mi_win_enable_large_os_pages() if (err == 0) err = GetLastError(); _mi_warning_message("cannot enable large OS page support, error %lu\n", err); } - return (ok!=0); + return (ok != 0); } void _mi_os_init(void) { @@ -144,7 +144,7 @@ void _mi_os_init(void) { if (hDll != NULL) { // use VirtualAlloc2FromApp if possible as it is available to Windows store apps pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + if (pVirtualAlloc2 == NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); FreeLibrary(hDll); } hDll = LoadLibrary(TEXT("ntdll.dll")); @@ -170,7 +170,7 @@ void _mi_os_init() { os_alloc_granularity = os_page_size; } if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = 2*MiB; + large_os_page_size = 2 * MiB; } } #endif @@ -210,7 +210,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; - if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { + if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { return VirtualAlloc(hint, size, flags, PAGE_READWRITE); } #endif @@ -233,7 +233,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; if ((large_only || use_large_os_page(size, try_alignment)) - && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { + && allow_large && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0) { uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. @@ -247,12 +247,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations + mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations } } } if (p == NULL) { - *is_large = ((flags&MEM_LARGE_PAGES) != 0); + *is_large = ((flags & MEM_LARGE_PAGES) != 0); p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } if (p == NULL) { @@ -264,8 +264,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, #elif defined(__wasi__) static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size(); - uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); - size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size()); + uintptr_t aligned_base = _mi_align_up(base, (uintptr_t)try_alignment); + size_t alloc_size = _mi_align_up(aligned_base - base + size, _mi_os_page_size()); mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0); if (alloc_size < size) return NULL; if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) { @@ -278,47 +278,50 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { #define MI_OS_USE_MMAP static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { void* p = NULL; - #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) +#if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { - p = mmap(hint,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; // fall back to regular mmap + p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p == MAP_FAILED) p = NULL; // fall back to regular mmap } - #else +#else UNUSED(try_alignment); - #endif - if (p==NULL) { - p = mmap(addr,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; +#endif + if (p == NULL) { + p = mmap(addr, size, protect_flags, flags, fd, 0); + if (p == MAP_FAILED) p = NULL; } return p; } static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { void* p = NULL; - #if !defined(MAP_ANONYMOUS) - #define MAP_ANONYMOUS MAP_ANON - #endif - int flags = MAP_PRIVATE | MAP_ANONYMOUS; +#if !defined(MAP_ANONYMOUS) +#define MAP_ANONYMOUS MAP_ANON +#endif +#if !defined(MAP_NORESERVE) +#define MAP_NORESERVE 0 +#endif + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; int fd = -1; - #if defined(MAP_ALIGNED) // BSD +#if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { size_t n = _mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); } } - #endif - #if defined(PROT_MAX) +#endif +#if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD - #endif - #if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) +#endif +#if defined(VM_MAKE_TAG) +// macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) int os_tag = (int)mi_option_get(mi_option_os_tag); if (os_tag < 100 || os_tag > 255) os_tag = 100; fd = VM_MAKE_TAG(os_tag); - #endif +#endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); @@ -332,39 +335,39 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro else { int lflags = flags; int lfd = fd; - #ifdef MAP_ALIGNED_SUPER +#ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; - #endif - #ifdef MAP_HUGETLB +#endif +#ifdef MAP_HUGETLB lflags |= MAP_HUGETLB; - #endif - #ifdef MAP_HUGE_1GB +#endif +#ifdef MAP_HUGE_1GB static bool mi_huge_pages_available = true; if ((size % GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else - #endif +#endif { - #ifdef MAP_HUGE_2MB +#ifdef MAP_HUGE_2MB lflags |= MAP_HUGE_2MB; - #endif +#endif } - #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB +#ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; - #endif +#endif if (large_only || lflags != flags) { // try large OS page allocation *is_large = true; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - #ifdef MAP_HUGE_1GB +#ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); } - #endif +#endif if (large_only) return p; if (p == NULL) { mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations @@ -375,7 +378,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro if (p == NULL) { *is_large = false; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); - #if defined(MADV_HUGEPAGE) +#if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available @@ -387,7 +390,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro *is_large = true; // possibly }; } - #endif +#endif } return p; } @@ -401,18 +404,18 @@ static volatile _Atomic(intptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; - if ((size%MI_SEGMENT_SIZE) != 0) return NULL; + if ((size % MI_SEGMENT_SIZE) != 0) return NULL; intptr_t hint = mi_atomic_add(&aligned_base, size); - if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) + if (hint == 0 || hint > ((intptr_t)30 << 40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode +#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); - init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB - #endif + init = init + (MI_SEGMENT_SIZE * ((r >> 17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB +#endif mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } - if (hint%try_alignment != 0) return NULL; + if (hint % try_alignment != 0) return NULL; return (void*)hint; } #else @@ -441,17 +444,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo } */ - #if defined(_WIN32) - int flags = MEM_RESERVE; - if (commit) flags |= MEM_COMMIT; - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); - #elif defined(__wasi__) - *is_large = false; - p = mi_wasm_heap_grow(size, try_alignment); - #else - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); - #endif +#if defined(_WIN32) + int flags = MEM_RESERVE; + if (commit) flags |= MEM_COMMIT; + p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); +#elif defined(__wasi__) + *is_large = false; + p = mi_wasm_heap_grow(size, try_alignment); +#else + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); +#endif mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); @@ -561,7 +564,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar allow_large = *large; *large = false; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats); + return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large != NULL ? large : &allow_large), tld->stats); } @@ -613,7 +616,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ _mi_stat_decrease(&stats->committed, csize); } - #if defined(_WIN32) +#if defined(_WIN32) if (commit) { // if the memory was already committed, the call succeeds but it is not zero'd // *is_zero = true; @@ -624,28 +627,42 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); err = (ok ? 0 : GetLastError()); } - #elif defined(__wasi__) +#elif defined(__wasi__) // WebAssembly guests can't control memory protection - #else +#elif defined(MAP_FIXED) + if (!commit) { + // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) + void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); + if (p != start) { err = errno; } + } + else { + // for commit, just change the protection + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } +#else err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); if (err != 0) { err = errno; } - #endif +#endif if (err != 0) { - _mi_warning_message("commit/decommit error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); } mi_assert_internal(err == 0); return (err == 0); } bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, false /* conservative? */, is_zero, stats); + return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); } bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { bool is_zero; - return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats); + return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); } +bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { + return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats); +} // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory @@ -657,24 +674,24 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) if (reset) _mi_stat_increase(&stats->reset, csize); - else _mi_stat_decrease(&stats->reset, csize); + else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! - #if (MI_DEBUG>1) - if (MI_SECURE==0) { +#if (MI_DEBUG>1) + if (MI_SECURE == 0) { memset(start, 0, csize); // pretend it is eagerly reset } - #endif +#endif #if defined(_WIN32) // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); - #if 1 +#if 1 if (p == start && start != NULL) { - VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set + VirtualUnlock(start, csize); // VirtualUnlock after MEM_RESET removes the memory from the working set } - #endif +#endif if (p != start) return false; #else #if defined(MADV_FREE) @@ -704,12 +721,22 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { - return mi_os_resetx(addr, size, true, stats); + if (mi_option_is_enabled(mi_option_reset_decommits)) { + return _mi_os_decommit(addr, size, stats); + } + else { + return mi_os_resetx(addr, size, true, stats); + } } bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); + if (mi_option_is_enabled(mi_option_reset_decommits)) { + return _mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) + } + else { + *is_zero = false; + return mi_os_resetx(addr, size, false, stats); + } } @@ -721,7 +748,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (csize == 0) return false; /* if (_mi_os_is_huge_reserved(addr)) { - _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); + _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ int err = 0; @@ -753,7 +780,7 @@ bool _mi_os_unprotect(void* addr, size_t size) { bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { // page align conservatively within the range - mi_assert_internal(oldsize > newsize && p != NULL); + mi_assert_internal(oldsize > newsize&& p != NULL); if (oldsize < newsize || p == NULL) return false; if (oldsize == newsize) return true; @@ -781,20 +808,20 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`) #if defined(WIN32) && (MI_INTPTR_SIZE >= 8) static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%GiB == 0); + mi_assert_internal(size % GiB == 0); mi_assert_internal(addr != NULL); const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; mi_win_enable_large_os_pages(); - #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) +#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { - #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE - #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) - #endif +#ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE +#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) +#endif params[0].Type = 5; // == MemExtendedParameterAttributeFlags; params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; ULONG param_count = 1; @@ -821,7 +848,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) params[0].ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } - #endif +#endif // otherwise use regular virtual alloc on older windows return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } @@ -842,16 +869,16 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons } #endif static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%GiB == 0); + mi_assert_internal(size % GiB == 0); bool is_large = true; void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + if (numa_node >= 0 && numa_node < 8 * MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8 * MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } @@ -883,7 +910,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r >> 17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB #endif } end = start + size; @@ -936,8 +963,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); if (page >= 1) { - mi_msecs_t estimate = ((elapsed / (page+1)) * pages); - if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + mi_msecs_t estimate = ((elapsed / (page + 1)) * pages); + if (estimate > 2 * max_msecs) { // seems like we are going to timeout, break elapsed = max_msecs + 1; } } @@ -947,7 +974,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } } } - mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); + mi_assert_internal(page * MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) *pages_reserved = page; if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; return (page == 0 ? NULL : start); @@ -956,7 +983,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { - if (p==NULL || size==0) return; + if (p == NULL || size == 0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); @@ -972,7 +999,7 @@ static size_t mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); - GetNumaProcessorNodeEx(&pnum,&numa_node); + GetNumaProcessorNodeEx(&pnum, &numa_node); return numa_node; } @@ -999,12 +1026,12 @@ static size_t mi_os_numa_nodex(void) { static size_t mi_os_numa_node_countx(void) { char buf[128]; unsigned node = 0; - for(node = 0; node < 256; node++) { + for (node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); - if (access(buf,R_OK) != 0) break; + if (access(buf, R_OK) != 0) break; } - return (node+1); + return (node + 1); } #else static size_t mi_os_numa_nodex(void) { @@ -1031,7 +1058,7 @@ size_t _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); size_t numa_count = _mi_os_numa_node_count(); - if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 + if (numa_count <= 1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 size_t numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } From 211f1aa5190f063ee8eef237473281535c2be79f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 14:55:12 -0800 Subject: [PATCH 095/293] remove reset delay slots; add reset tracking per page and segment --- include/mimalloc-internal.h | 8 +- include/mimalloc-types.h | 28 +--- include/mimalloc.h | 3 +- src/arena.c | 8 +- src/bitmap.inc.c | 54 ++++++-- src/init.c | 11 +- src/memory.c | 199 +++++++++++++++------------ src/options.c | 5 +- src/os.c | 204 ++++++++++++++-------------- src/page.c | 7 +- src/segment.c | 264 ++++++++++++++++++++++-------------- 11 files changed, 443 insertions(+), 348 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d727e563..ab295e65 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -59,7 +59,7 @@ size_t _mi_os_good_alloc_size(size_t size); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); -void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld); +void _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld); bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); @@ -75,7 +75,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size); // page start for any page +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; @@ -297,7 +297,9 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const // Quick page start for initialized pages static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - return _mi_segment_page_start(segment, page, page->block_size, page_size); + const size_t bsize = page->block_size; + mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0); + return _mi_segment_page_start(segment, page, bsize, page_size, NULL); } // Get the page containing the pointer diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 0ce91339..e816c3a6 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -384,31 +384,12 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) - -// ------------------------------------------------------ -// Delay slots (to avoid expensive OS calls) -// ------------------------------------------------------ -typedef int64_t mi_msecs_t; - -#define MI_RESET_DELAY_SLOTS (256) - -typedef struct mi_delay_slot_s { - mi_msecs_t expire; - uint8_t* addr; - size_t size; -} mi_delay_slot_t; - -typedef struct mi_delay_slots_s { - size_t capacity; // always `MI_RESET_DELAY_SLOTS` - size_t count; // current slots used (`<= capacity`) - mi_delay_slot_t slots[MI_RESET_DELAY_SLOTS]; -} mi_delay_slots_t; - - // ------------------------------------------------------ // Thread Local data // ------------------------------------------------------ +typedef int64_t mi_msecs_t; + // Queue of segments typedef struct mi_segment_queue_s { mi_segment_t* first; @@ -417,9 +398,8 @@ typedef struct mi_segment_queue_s { // OS thread local data typedef struct mi_os_tld_s { - size_t region_idx; // start point for next allocation - mi_delay_slots_t* reset_delay; // delay slots for OS reset operations - mi_stats_t* stats; // points to tld stats + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats } mi_os_tld_t; // Segments thread local data diff --git a/include/mimalloc.h b/include/mimalloc.h index a59b9cf7..197b1734 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -272,8 +272,9 @@ typedef enum mi_option_e { mi_option_segment_cache, mi_option_page_reset, mi_option_segment_reset, - mi_option_eager_commit_delay, mi_option_reset_decommits, + mi_option_eager_commit_delay, + mi_option_reset_delay, mi_option_use_numa_nodes, mi_option_os_tag, mi_option_max_errors, diff --git a/src/arena.c b/src/arena.c index 46741208..4a596b2c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -107,7 +107,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* size_t idx = mi_atomic_read(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around - if (mi_bitmap_try_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { + if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { mi_atomic_write(&arena->search_idx, idx); // start search from here next time return true; } @@ -137,9 +137,9 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n } else if (commit) { // ensure commit now - bool any_zero; - mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_zero); - if (any_zero) { + bool any_uncommitted; + mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); + if (any_uncommitted) { bool commit_zero; _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); if (commit_zero) *is_zero = true; diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 81f87a79..11ada472 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -104,9 +104,29 @@ static inline size_t mi_bsr(uintptr_t x) { Claim a bit sequence atomically ----------------------------------------------------------- */ +// Try to atomically claim a sequence of `count` bits at in `idx` +// in the bitmap field. Returns `true` on success. +static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_fields, const size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS); + + mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]); + if ((field & mask) == 0) { // free? + if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) { + // claimed! + return true; + } + } + return false; +} + + // Try to atomically claim a sequence of `count` bits in a single // field at `idx` in `bitmap`. Returns `true` on success. -static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); volatile _Atomic(uintptr_t)* field = &bitmap[idx]; @@ -160,9 +180,9 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { +static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { for (size_t idx = 0; idx < bitmap_fields; idx++) { - if (mi_bitmap_try_claim_field(bitmap, idx, count, bitmap_idx)) { + if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } } @@ -170,39 +190,51 @@ static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, } // Set `count` bits at `bitmap_idx` to 0 atomically -// Returns `true` if all `count` bits were 1 previously +// Returns `true` if all `count` bits were 1 previously. static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - mi_assert_internal((bitmap[idx] & mask) == mask); + // mi_assert_internal((bitmap[idx] & mask) == mask); uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask); return ((prev & mask) == mask); } // Set `count` bits at `bitmap_idx` to 1 atomically -// Returns `true` if all `count` bits were 0 previously +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - // mi_assert_internal((bitmap[idx] & mask) == 0); + //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); if (any_zero != NULL) *any_zero = ((prev & mask) != mask); return ((prev & mask) == 0); } -// Returns `true` if all `count` bits were 1 -static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { +// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one. +static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - // mi_assert_internal((bitmap[idx] & mask) == 0); - return ((mi_atomic_read(&bitmap[idx]) & mask) == mask); + mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]); + if (any_ones != NULL) *any_ones = ((field & mask) != 0); + return ((field & mask) == mask); } +static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); +} + +static inline bool mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + bool any_ones; + mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + return any_ones; +} + + #endif diff --git a/src/init.c b/src/init.c index f9735462..468fd46f 100644 --- a/src/init.c +++ b/src/init.c @@ -97,13 +97,11 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) #define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) -static mi_delay_slots_t tld_reset_delay_main = { MI_RESET_DELAY_SLOTS, 0, { {0,NULL,0} } }; - static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, &tld_reset_delay_main, tld_main_stats }, // os + { 0, tld_main_stats }, // os { MI_STATS_NULL } // stats }; @@ -194,8 +192,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` - mi_tld_t tld; - mi_delay_slots_t reset_delay; + mi_tld_t tld; } mi_thread_data_t; // Initialize the thread local default heap, called from `mi_thread_init` @@ -215,7 +212,6 @@ static bool _mi_heap_init(void) { } mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; - mi_delay_slots_t* reset_delay = &td->reset_delay; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); heap->random = _mi_random_init(heap->thread_id); @@ -226,9 +222,6 @@ static bool _mi_heap_init(void) { tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; tld->os.stats = &tld->stats; - tld->os.reset_delay = reset_delay; - memset(reset_delay, 0, sizeof(*reset_delay)); - reset_delay->capacity = MI_RESET_DELAY_SLOTS; _mi_heap_set_default_direct(heap); } return false; diff --git a/src/memory.c b/src/memory.c index b0bcf7a0..94b6348f 100644 --- a/src/memory.c +++ b/src/memory.c @@ -54,6 +54,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, s void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); + // Constants #if (MI_INTPTR_SIZE==8) #define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map @@ -73,28 +74,26 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo // Region info is a pointer to the memory region and two bits for // its flags: is_large, and is_committed. -typedef uintptr_t mi_region_info_t; - -static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { - return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0)); -} - -static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) { - if (is_large) *is_large = ((info&0x02) != 0); - if (is_committed) *is_committed = ((info&0x01) != 0); - return (void*)(info & ~0x03); -} +typedef union mi_region_info_u { + uintptr_t value; + struct { + bool valid; + bool is_large; + int numa_node; + }; +} mi_region_info_t; // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) - volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) + volatile _Atomic(uintptr_t) info; // is_large, and associated numa node + 1 (so 0 is no association) + volatile _Atomic(void*) start; // start of the memory area (and flags) mi_bitmap_field_t in_use; // bit per in-use block mi_bitmap_field_t dirty; // track if non-zero per block mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed)) - size_t arena_memid; // if allocated from a (huge page) arena + mi_bitmap_field_t reset; // track reset per block + volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena- } mem_region_t; // The region map @@ -113,24 +112,32 @@ static size_t mi_region_block_count(size_t size) { return _mi_divide_up(size, MI_SEGMENT_SIZE); } +/* // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. static size_t mi_good_commit_size(size_t size) { if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; return _mi_align_up(size, _mi_os_large_page_size()); } +*/ // Return if a pointer points into a region reserved by us. bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; size_t count = mi_atomic_read_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(®ions[i].info), NULL, NULL); + uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; } +static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { + void* start = mi_atomic_read_ptr(®ion->start); + mi_assert_internal(start != NULL); + return ((uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE)); +} + static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); size_t idx = region - regions; @@ -142,13 +149,10 @@ static size_t mi_memid_create_from_arena(size_t arena_memid) { return (arena_memid << 1) | 1; } -static bool mi_memid_is_arena(size_t id) { - return ((id&1)==1); -} -static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { - if (mi_memid_is_arena(id)) { - *arena_memid = (id>>1); +static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { + if ((id&1)==1) { + if (arena_memid != NULL) *arena_memid = (id>>1); return true; } else { @@ -159,6 +163,7 @@ static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t } } + /* ---------------------------------------------------------------------------- Allocate a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ @@ -187,16 +192,21 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, // allocated, initialize and claim the initial blocks mem_region_t* r = ®ions[idx]; - r->numa_node = _mi_os_numa_node(tld) + 1; - r->arena_memid = arena_memid; + r->arena_memid = arena_memid; mi_atomic_write(&r->in_use, 0); mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL)); mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0)); + mi_atomic_write(&r->reset, 0); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); + mi_atomic_write_ptr(&r->start, start); // and share it - mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others + mi_region_info_t info; + info.valid = true; + info.is_large = region_large; + info.numa_node = _mi_os_numa_node(tld); + mi_atomic_write(&r->info, info.value); // now make it available to others *region = r; return true; } @@ -207,36 +217,33 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? - mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); - if (info==0) return false; + mi_region_info_t info; + info.value = mi_atomic_read_relaxed(®ion->info); + if (info.value==0) return false; // numa correct if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = ((int)mi_atomic_read_relaxed(®ion->numa_node)) - 1; + int rnode = info.numa_node; if (rnode >= 0 && rnode != numa_node) return false; } // check allow-large - bool is_large; - bool is_committed; - mi_region_info_read(info, &is_large, &is_committed); - if (!allow_large && is_large) return false; + if (!allow_large && info.is_large) return false; return true; } -static bool mi_region_try_claim(size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { - // try all regions for a free slot - const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); + // try all regions for a free slot const size_t count = mi_atomic_read(®ions_count); size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around mem_region_t* r = ®ions[idx]; if (mi_region_is_suitable(r, numa_node, allow_large)) { - if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) { + if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; return true; @@ -252,8 +259,9 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); mem_region_t* region; mi_bitmap_index_t bit_idx; - // first try to claim in existing regions - if (!mi_region_try_claim(blocks, *is_large, ®ion, &bit_idx, tld)) { + const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); + // try to claim in existing regions + if (!mi_region_try_claim(numa_node, blocks, *is_large, ®ion, &bit_idx, tld)) { // otherwise try to allocate a fresh region if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { // out of regions or memory @@ -261,30 +269,28 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo } } + // found a region and claimed `blocks` at `bit_idx` mi_assert_internal(region != NULL); mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); - mi_region_info_t info = mi_atomic_read(®ion->info); - bool region_is_committed = false; - bool region_is_large = false; - void* start = mi_region_info_read(info, ®ion_is_large, ®ion_is_committed); - mi_assert_internal(!(region_is_large && !*is_large)); + mi_region_info_t info; + info.value = mi_atomic_read(®ion->info); + void* start = mi_atomic_read_ptr(®ion->start); + mi_assert_internal(!(info.is_large && !*is_large)); mi_assert_internal(start != NULL); - *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); - *is_large = region_is_large; + *is_zero = mi_bitmap_unclaim(®ion->dirty, 1, blocks, bit_idx); + *is_large = info.is_large; *memid = mi_memid_create(region, bit_idx); void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); - if (region_is_committed) { - // always committed - *commit = true; - } - else if (*commit) { + + // commit + if (*commit) { // ensure commit - bool any_zero; - mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_zero); - if (any_zero) { + bool any_uncommitted; + mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); + if (any_uncommitted) { bool commit_zero; _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); if (commit_zero) *is_zero = true; @@ -294,6 +300,21 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // no need to commit, but check if already fully committed *commit = mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); } + mi_assert_internal(mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); + + // unreset reset blocks + if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { + mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); + mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); + bool reset_zero; + _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); + if (reset_zero) *is_zero = true; + } + mi_assert_internal(!mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); + + #if (MI_DEBUG>=2) + if (*commit) { ((uint8_t*)p)[0] = 0; } + #endif // and return the allocation mi_assert_internal(p != NULL); @@ -325,7 +346,9 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0); if (p != NULL) { + #if (MI_DEBUG>=2) if (*commit) { ((uint8_t*)p)[0] = 0; } + #endif return p; } _mi_warning_message("unable to allocate from region: size %zu\n", size); @@ -346,56 +369,56 @@ Free -----------------------------------------------------------------------------*/ // Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { +void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { mi_assert_internal(size > 0 && tld != NULL); if (p==NULL) return; if (size==0) return; - + size = _mi_align_up(size, _mi_os_page_size()); + size_t arena_memid = 0; mi_bitmap_index_t bit_idx; mem_region_t* region; - if (mi_memid_indices(id,®ion,&bit_idx,&arena_memid)) { + if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { // was a direct arena allocation, pass through _mi_arena_free(p, size, arena_memid, tld->stats); } else { // allocated in a region mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; - // we can align the size up to page size (as we allocate that way too) - // this ensures we fully commit/decommit/reset - size = _mi_align_up(size, _mi_os_page_size()); const size_t blocks = mi_region_block_count(size); - mi_region_info_t info = mi_atomic_read(®ion->info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info, &is_large, &is_committed); - mi_assert_internal(start != NULL); - void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE); + mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); + mi_region_info_t info; + info.value = mi_atomic_read(®ion->info); + mi_assert_internal(info.value != 0); + void* blocks_start = mi_region_blocks_start(region, bit_idx); mi_assert_internal(blocks_start == p); // not a pointer in our area? mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? - // decommit (or reset) the blocks to reduce the working set. - // TODO: implement delayed decommit/reset as these calls are too expensive - // if the memory is reused soon. - // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large - if (!is_large && - mi_option_is_enabled(mi_option_segment_reset) && - mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use `option_page_reset` instead - { - // note: don't use `_mi_mem_reset` as it is shared with other threads! - _mi_os_reset(p, size, tld->stats); // TODO: maintain reset bits to unreset - } - if (!is_committed) { - // adjust commit statistics as we commit again when re-using the same slot - _mi_stat_decrease(&tld->stats->committed, mi_good_commit_size(size)); + // committed? + if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { + mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); } - // TODO: should we free empty regions? currently only done _mi_mem_collect. - // this frees up virtual address space which might be useful on 32-bit systems? + if (any_reset) { + // set the is_reset bits if any pages were reset + mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); + } + + // reset the blocks to reduce the working set. + if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) && + mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use only `option_page_reset` instead + { + bool any_unreset; + mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); + if (any_unreset) { + _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); + } + } // and unclaim - mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed); } } @@ -416,13 +439,14 @@ void _mi_mem_collect(mi_os_tld_t* tld) { } while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 )); if (m == 0) { // on success, free the whole region - bool is_eager_committed; - void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); - if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); + void* start = mi_atomic_read_ptr(®ions[i].start); + size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid); + memset(®ions[i], 0, sizeof(mem_region_t)); + // and release the whole region + mi_atomic_write(®ion->info, 0); + if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); } - // and release - mi_atomic_write(®ion->info,0); } } } @@ -432,6 +456,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ + bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { return _mi_os_reset(p, size, tld->stats); } diff --git a/src/options.c b/src/options.c index 8c4c1707..9b6e4cd0 100644 --- a/src/options.c +++ b/src/options.c @@ -65,10 +65,11 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset uses decommit/commit + { 500,UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/os.c b/src/os.c index 02683a02..553d72c9 100644 --- a/src/os.c +++ b/src/os.c @@ -77,11 +77,11 @@ static bool use_large_os_page(size_t size, size_t alignment) { // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { size_t align_size; - if (size < 512 * KiB) align_size = _mi_os_page_size(); - else if (size < 2 * MiB) align_size = 64 * KiB; - else if (size < 8 * MiB) align_size = 256 * KiB; - else if (size < 32 * MiB) align_size = 1 * MiB; - else align_size = 4 * MiB; + if (size < 512*KiB) align_size = _mi_os_page_size(); + else if (size < 2*MiB) align_size = 64*KiB; + else if (size < 8*MiB) align_size = 256*KiB; + else if (size < 32*MiB) align_size = 1*MiB; + else align_size = 4*MiB; if (size >= (SIZE_MAX - align_size)) return size; // possible overflow? return _mi_align_up(size, align_size); } @@ -92,8 +92,8 @@ size_t _mi_os_good_alloc_size(size_t size) { // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) // We hide MEM_EXTENDED_PARAMETER to compile with older SDK's. #include -typedef PVOID(__stdcall* PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); -typedef NTSTATUS(__stdcall* PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); +typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; @@ -129,7 +129,7 @@ static bool mi_win_enable_large_os_pages() if (err == 0) err = GetLastError(); _mi_warning_message("cannot enable large OS page support, error %lu\n", err); } - return (ok != 0); + return (ok!=0); } void _mi_os_init(void) { @@ -144,7 +144,7 @@ void _mi_os_init(void) { if (hDll != NULL) { // use VirtualAlloc2FromApp if possible as it is available to Windows store apps pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2 == NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); FreeLibrary(hDll); } hDll = LoadLibrary(TEXT("ntdll.dll")); @@ -170,7 +170,7 @@ void _mi_os_init() { os_alloc_granularity = os_page_size; } if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = 2 * MiB; + large_os_page_size = 2*MiB; } } #endif @@ -210,7 +210,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; - if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { + if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { return VirtualAlloc(hint, size, flags, PAGE_READWRITE); } #endif @@ -233,7 +233,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; if ((large_only || use_large_os_page(size, try_alignment)) - && allow_large && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0) { + && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. @@ -247,12 +247,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations + mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations } } } if (p == NULL) { - *is_large = ((flags & MEM_LARGE_PAGES) != 0); + *is_large = ((flags&MEM_LARGE_PAGES) != 0); p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } if (p == NULL) { @@ -264,8 +264,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, #elif defined(__wasi__) static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size(); - uintptr_t aligned_base = _mi_align_up(base, (uintptr_t)try_alignment); - size_t alloc_size = _mi_align_up(aligned_base - base + size, _mi_os_page_size()); + uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); + size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size()); mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0); if (alloc_size < size) return NULL; if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) { @@ -278,50 +278,50 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { #define MI_OS_USE_MMAP static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { void* p = NULL; -#if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { - p = mmap(hint, size, protect_flags, flags, fd, 0); - if (p == MAP_FAILED) p = NULL; // fall back to regular mmap + p = mmap(hint,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; // fall back to regular mmap } -#else + #else UNUSED(try_alignment); -#endif - if (p == NULL) { - p = mmap(addr, size, protect_flags, flags, fd, 0); - if (p == MAP_FAILED) p = NULL; + #endif + if (p==NULL) { + p = mmap(addr,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; } return p; } static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { void* p = NULL; -#if !defined(MAP_ANONYMOUS) -#define MAP_ANONYMOUS MAP_ANON -#endif -#if !defined(MAP_NORESERVE) -#define MAP_NORESERVE 0 -#endif + #if !defined(MAP_ANONYMOUS) + #define MAP_ANONYMOUS MAP_ANON + #endif + #if !defined(MAP_NORESERVE) + #define MAP_NORESERVE 0 + #endif int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; int fd = -1; -#if defined(MAP_ALIGNED) // BSD + #if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { size_t n = _mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); } } -#endif -#if defined(PROT_MAX) + #endif + #if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD -#endif -#if defined(VM_MAKE_TAG) -// macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + #endif + #if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) int os_tag = (int)mi_option_get(mi_option_os_tag); if (os_tag < 100 || os_tag > 255) os_tag = 100; fd = VM_MAKE_TAG(os_tag); -#endif + #endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); @@ -335,39 +335,39 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro else { int lflags = flags; int lfd = fd; -#ifdef MAP_ALIGNED_SUPER + #ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; -#endif -#ifdef MAP_HUGETLB + #endif + #ifdef MAP_HUGETLB lflags |= MAP_HUGETLB; -#endif -#ifdef MAP_HUGE_1GB + #endif + #ifdef MAP_HUGE_1GB static bool mi_huge_pages_available = true; if ((size % GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else -#endif + #endif { -#ifdef MAP_HUGE_2MB + #ifdef MAP_HUGE_2MB lflags |= MAP_HUGE_2MB; -#endif + #endif } -#ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; -#endif + #endif if (large_only || lflags != flags) { // try large OS page allocation *is_large = true; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); -#ifdef MAP_HUGE_1GB + #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); } -#endif + #endif if (large_only) return p; if (p == NULL) { mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations @@ -378,7 +378,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro if (p == NULL) { *is_large = false; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); -#if defined(MADV_HUGEPAGE) + #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available @@ -390,7 +390,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro *is_large = true; // possibly }; } -#endif + #endif } return p; } @@ -404,18 +404,18 @@ static volatile _Atomic(intptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; - if ((size % MI_SEGMENT_SIZE) != 0) return NULL; + if ((size%MI_SEGMENT_SIZE) != 0) return NULL; intptr_t hint = mi_atomic_add(&aligned_base, size); - if (hint == 0 || hint > ((intptr_t)30 << 40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) + if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area -#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); - init = init + (MI_SEGMENT_SIZE * ((r >> 17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB -#endif + init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB + #endif mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } - if (hint % try_alignment != 0) return NULL; + if (hint%try_alignment != 0) return NULL; return (void*)hint; } #else @@ -444,17 +444,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo } */ -#if defined(_WIN32) - int flags = MEM_RESERVE; - if (commit) flags |= MEM_COMMIT; - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); -#elif defined(__wasi__) - *is_large = false; - p = mi_wasm_heap_grow(size, try_alignment); -#else - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); -#endif + #if defined(_WIN32) + int flags = MEM_RESERVE; + if (commit) flags |= MEM_COMMIT; + p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + #elif defined(__wasi__) + *is_large = false; + p = mi_wasm_heap_grow(size, try_alignment); + #else + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + #endif mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); @@ -564,7 +564,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar allow_large = *large; *large = false; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large != NULL ? large : &allow_large), tld->stats); + return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats); } @@ -616,7 +616,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ _mi_stat_decrease(&stats->committed, csize); } -#if defined(_WIN32) + #if defined(_WIN32) if (commit) { // if the memory was already committed, the call succeeds but it is not zero'd // *is_zero = true; @@ -627,9 +627,9 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); err = (ok ? 0 : GetLastError()); } -#elif defined(__wasi__) + #elif defined(__wasi__) // WebAssembly guests can't control memory protection -#elif defined(MAP_FIXED) + #elif defined(MAP_FIXED) if (!commit) { // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); @@ -640,10 +640,10 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); if (err != 0) { err = errno; } } -#else + #else err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); if (err != 0) { err = errno; } -#endif + #endif if (err != 0) { _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); } @@ -674,24 +674,24 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) if (reset) _mi_stat_increase(&stats->reset, csize); - else _mi_stat_decrease(&stats->reset, csize); + else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! -#if (MI_DEBUG>1) - if (MI_SECURE == 0) { + #if (MI_DEBUG>1) + if (MI_SECURE==0) { memset(start, 0, csize); // pretend it is eagerly reset } -#endif + #endif #if defined(_WIN32) // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); -#if 1 + #if 1 if (p == start && start != NULL) { - VirtualUnlock(start, csize); // VirtualUnlock after MEM_RESET removes the memory from the working set + VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set } -#endif + #endif if (p != start) return false; #else #if defined(MADV_FREE) @@ -748,7 +748,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (csize == 0) return false; /* if (_mi_os_is_huge_reserved(addr)) { - _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); + _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ int err = 0; @@ -780,7 +780,7 @@ bool _mi_os_unprotect(void* addr, size_t size) { bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { // page align conservatively within the range - mi_assert_internal(oldsize > newsize&& p != NULL); + mi_assert_internal(oldsize > newsize && p != NULL); if (oldsize < newsize || p == NULL) return false; if (oldsize == newsize) return true; @@ -808,20 +808,20 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`) #if defined(WIN32) && (MI_INTPTR_SIZE >= 8) static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size % GiB == 0); + mi_assert_internal(size%GiB == 0); mi_assert_internal(addr != NULL); const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; mi_win_enable_large_os_pages(); -#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) + #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { -#ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE -#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) -#endif + #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE + #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) + #endif params[0].Type = 5; // == MemExtendedParameterAttributeFlags; params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; ULONG param_count = 1; @@ -848,7 +848,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) params[0].ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } -#endif + #endif // otherwise use regular virtual alloc on older windows return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } @@ -869,16 +869,16 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons } #endif static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size % GiB == 0); + mi_assert_internal(size%GiB == 0); bool is_large = true; void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8 * MI_INTPTR_SIZE) { // at most 64 nodes + if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8 * MI_INTPTR_SIZE, 0); + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } @@ -910,7 +910,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r >> 17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB #endif } end = start + size; @@ -963,8 +963,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); if (page >= 1) { - mi_msecs_t estimate = ((elapsed / (page + 1)) * pages); - if (estimate > 2 * max_msecs) { // seems like we are going to timeout, break + mi_msecs_t estimate = ((elapsed / (page+1)) * pages); + if (estimate > 2*max_msecs) { // seems like we are going to timeout, break elapsed = max_msecs + 1; } } @@ -974,7 +974,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } } } - mi_assert_internal(page * MI_HUGE_OS_PAGE_SIZE <= size); + mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) *pages_reserved = page; if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; return (page == 0 ? NULL : start); @@ -983,7 +983,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { - if (p == NULL || size == 0) return; + if (p==NULL || size==0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); @@ -999,7 +999,7 @@ static size_t mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); - GetNumaProcessorNodeEx(&pnum, &numa_node); + GetNumaProcessorNodeEx(&pnum,&numa_node); return numa_node; } @@ -1026,12 +1026,12 @@ static size_t mi_os_numa_nodex(void) { static size_t mi_os_numa_node_countx(void) { char buf[128]; unsigned node = 0; - for (node = 0; node < 256; node++) { + for(node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); - if (access(buf, R_OK) != 0) break; + if (access(buf,R_OK) != 0) break; } - return (node + 1); + return (node+1); } #else static size_t mi_os_numa_nodex(void) { @@ -1058,7 +1058,7 @@ size_t _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); size_t numa_count = _mi_os_numa_node_count(); - if (numa_count <= 1) return 0; // optimize on single numa node systems: always node 0 + if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 size_t numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } diff --git a/src/page.c b/src/page.c index 9085ccb5..df6ecc71 100644 --- a/src/page.c +++ b/src/page.c @@ -75,7 +75,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); + mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL,NULL)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -229,6 +229,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_internal(page->heap == NULL); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); + mi_assert_internal(!page->is_reset); _mi_page_free_collect(page,false); mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); mi_page_queue_push(heap, pq, page); @@ -342,7 +343,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(page->heap != NULL); - + #if MI_DEBUG > 1 mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); #endif @@ -597,7 +598,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(block_size > 0); // set fields size_t page_size; - _mi_segment_page_start(segment, page, block_size, &page_size); + _mi_segment_page_start(segment, page, block_size, &page_size, NULL); page->block_size = block_size; mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); diff --git a/src/segment.c b/src/segment.c index 549dd339..ffba8c0d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -13,6 +13,8 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_PAGE_HUGE_ALIGN (256*1024) +static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); + /* ----------------------------------------------------------- Segment allocation We allocate pages inside big OS allocated "segments" @@ -40,7 +42,6 @@ terms of the MIT license. A copy of the license can be found in the file Queue of segments containing free pages ----------------------------------------------------------- */ - #if (MI_DEBUG>=3) static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { mi_assert_internal(segment != NULL); @@ -143,31 +144,50 @@ static bool mi_segment_is_valid(mi_segment_t* segment) { } #endif + +/* ----------------------------------------------------------- + Page reset +----------------------------------------------------------- */ + +static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { + if (!mi_option_is_enabled(mi_option_page_reset)) return; + if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; + size_t psize; + void* start = mi_segment_raw_page_start(segment, page, &psize); + page->is_reset = true; + mi_assert_internal(size <= psize); + _mi_mem_reset(start, ((size == 0 || size > psize) ? psize : size), tld->os); +} + +static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) +{ + mi_assert_internal(page->is_reset); + mi_assert_internal(!segment->mem_is_fixed); + page->is_reset = false; + size_t psize; + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + bool is_zero = false; + _mi_mem_unreset(start, ((size == 0 || size > psize) ? psize : size), &is_zero, tld->os); + if (is_zero) page->is_zero_init = true; +} + + /* ----------------------------------------------------------- Segment size calculations ----------------------------------------------------------- */ -// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size) -{ +// Raw start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) +// The raw start is not taking aligned block allocation into consideration. +static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); - uint8_t* p = (uint8_t*)segment + page->segment_idx*psize; + uint8_t* p = (uint8_t*)segment + page->segment_idx * psize; if (page->segment_idx == 0) { // the first page starts after the segment info (and possible guard page) - p += segment->segment_info_size; + p += segment->segment_info_size; psize -= segment->segment_info_size; - // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) - if (block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { - size_t adjust = block_size - ((uintptr_t)p % block_size); - if (adjust < block_size) { - p += adjust; - psize -= adjust; - } - mi_assert_internal((uintptr_t)p % block_size == 0); - } } - + if (MI_SECURE > 1 || (MI_SECURE == 1 && page->segment_idx == segment->capacity - 1)) { // secure == 1: the last page has an os guard page at the end // secure > 1: every page has an os guard page @@ -175,19 +195,36 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } if (page_size != NULL) *page_size = psize; - mi_assert_internal(_mi_ptr_page(p) == page); + mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } -static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { - /* - if (mi_option_is_enabled(mi_option_secure)) { - // always reserve maximally so the protection falls on - // the same address area, as we need to reuse them from the caches interchangably. - capacity = MI_SMALL_PAGES_PER_SEGMENT; +// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size) +{ + size_t psize; + uint8_t* p = mi_segment_raw_page_start(segment, page, &psize); + if (pre_size != NULL) *pre_size = 0; + if (page->segment_idx == 0 && block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { + // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + size_t adjust = block_size - ((uintptr_t)p % block_size); + if (adjust < block_size) { + p += adjust; + psize -= adjust; + if (pre_size != NULL) *pre_size = adjust; + } + mi_assert_internal((uintptr_t)p % block_size == 0); } - */ + + if (page_size != NULL) *page_size = psize; + mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page); + mi_assert_internal(_mi_ptr_segment(p) == segment); + return p; +} + +static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) +{ const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; size_t guardsize = 0; size_t isize = 0; @@ -234,7 +271,15 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_assert_internal(!segment->mem_is_fixed); _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set } - _mi_mem_free(segment, segment_size, segment->memid, tld->os); + + bool fully_committed = true; + bool any_reset = false; + for (size_t i = 0; i < segment->capacity; i++) { + const mi_page_t* page = &segment->pages[i]; + if (!page->is_committed) fully_committed = false; + if (page->is_reset) any_reset = true; + } + _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); } @@ -275,7 +320,7 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); - mi_assert_internal(segment->next == NULL); + mi_assert_internal(segment->next == NULL); if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { return false; } @@ -328,31 +373,31 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (page_kind >= MI_PAGE_LARGE); - bool protection_still_good = false; + bool pages_still_good = false; bool is_zero = false; // Try to get it from our thread local cache first - mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); + mi_segment_t* segment = NULL; // mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { - if (MI_SECURE!=0) { - mi_assert_internal(!segment->mem_is_fixed); - if (segment->page_kind != page_kind) { + if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) { + pages_still_good = true; + } + else + { + // different page kinds; unreset any reset pages, and unprotect + // TODO: optimize cache pop to return fitting pages if possible? + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->is_reset) { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? (instead of the full page) + } + } + if (MI_SECURE!=0) { + mi_assert_internal(!segment->mem_is_fixed); + // TODO: should we unprotect per page? (with is_protected flag?) _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs } - else { - protection_still_good = true; // otherwise, the guard pages are still in place - } - } - if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) { - mi_assert_internal(!segment->mem_is_fixed); - _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->os); - segment->mem_is_committed = true; - } - if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_page_reset)) { - bool reset_zero = false; - _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->os); - if (reset_zero) is_zero = true; - } + } } else { // Allocate the segment from the OS @@ -373,27 +418,42 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - // zero the segment info (but not the `mem` fields) - ptrdiff_t ofs = offsetof(mi_segment_t,next); - memset((uint8_t*)segment + ofs, 0, info_size - ofs); - - // guard pages - if ((MI_SECURE != 0) && !protection_still_good) { - // in secure mode, we set up a protected page in between the segment info - // and the page data - mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); - _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); - size_t os_page_size = _mi_os_page_size(); - if (MI_SECURE <= 1) { - // and protect the last page too - _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); - } - else { - // protect every page - for (size_t i = 0; i < capacity; i++) { - _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); + if (!pages_still_good) { + // guard pages + if (MI_SECURE != 0) { + // in secure mode, we set up a protected page in between the segment info + // and the page data + mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); + _mi_mem_protect((uint8_t*)segment + info_size, (pre_size - info_size)); + const size_t os_page_size = _mi_os_page_size(); + if (MI_SECURE <= 1) { + // and protect the last page too + _mi_mem_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size); + } + else { + // protect every page + for (size_t i = 0; i < capacity; i++) { + _mi_mem_protect((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size); + } } } + + // zero the segment info (but not the `mem` fields) + ptrdiff_t ofs = offsetof(mi_segment_t, next); + memset((uint8_t*)segment + ofs, 0, info_size - ofs); + + // initialize pages info + for (uint8_t i = 0; i < capacity; i++) { + segment->pages[i].segment_idx = i; + segment->pages[i].is_reset = false; + segment->pages[i].is_committed = commit; + segment->pages[i].is_zero_init = is_zero; + } + } + else { + // zero the segment info but not the pages info (and mem fields) + ptrdiff_t ofs = offsetof(mi_segment_t, next); + memset((uint8_t*)segment + ofs, 0, offsetof(mi_segment_t,pages) - ofs); } // initialize @@ -404,13 +464,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); - for (uint8_t i = 0; i < segment->capacity; i++) { - segment->pages[i].segment_idx = i; - segment->pages[i].is_reset = false; - segment->pages[i].is_committed = commit; - segment->pages[i].is_zero_init = is_zero; - } _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } @@ -463,24 +518,22 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use) { - if (page->is_reset || !page->is_committed) { + // set in-use before doing unreset to prevent delayed reset + page->segment_in_use = true; + segment->used++; + if (!page->is_committed) { + mi_assert_internal(!segment->mem_is_fixed); + mi_assert_internal(!page->is_reset); size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - if (!page->is_committed) { - mi_assert_internal(!segment->mem_is_fixed); - page->is_committed = true; - bool is_zero = false; - _mi_mem_commit(start,psize,&is_zero,tld->os); - if (is_zero) page->is_zero_init = true; - } - if (page->is_reset) { - mi_assert_internal(!segment->mem_is_fixed); - page->is_reset = false; - bool is_zero = false; - _mi_mem_unreset(start, psize, &is_zero, tld->os); - if (is_zero) page->is_zero_init = true; - } + uint8_t* start = _mi_page_start(segment, page, &psize); + page->is_committed = true; + bool is_zero = false; + _mi_mem_commit(start,psize,&is_zero,tld->os); + if (is_zero) page->is_zero_init = true; } + if (page->is_reset) { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? + } return page; } } @@ -503,22 +556,21 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); - // reset the page memory to reduce memory pressure? - if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) - // && segment->page_kind <= MI_PAGE_MEDIUM) // to prevent partial overlapping resets - { - size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - page->is_reset = true; - _mi_mem_reset(start, psize, tld->os); - } + // calculate the used size from the raw (non-aligned) start of the page + size_t pre_size; + _mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); + size_t used_size = pre_size + (page->capacity * page->block_size); - // zero the page data, but not the segment fields + // zero the page data, but not the segment fields page->is_zero_init = false; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); page->segment_in_use = false; segment->used--; + + // reset the page memory to reduce memory pressure? + // note: must come after setting `segment_in_use` to false + mi_page_reset(segment, page, used_size, tld); } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) @@ -568,7 +620,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { // remove the segment from the free page queue if needed mi_segment_remove_from_free_queue(segment,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); - + // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); @@ -628,6 +680,8 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { + mi_assert_internal(!page->is_reset); + mi_assert_internal(page->is_committed); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); @@ -636,7 +690,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_page_clear(segment,page,tld); } else { - // otherwise reclaim it + // otherwise reclaim it _mi_page_reclaim(heap,page); } } @@ -666,8 +720,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld); - page->segment_in_use = true; - segment->used++; + mi_assert_internal(page->segment_in_use); mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { // if no more free pages, remove from the queue @@ -685,7 +738,11 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segment_enqueue(free_queue, segment); } mi_assert_internal(free_queue->first != NULL); - return mi_segment_page_alloc_in(free_queue->first,tld); + mi_page_t* page = mi_segment_page_alloc_in(free_queue->first,tld); +#if MI_DEBUG>=2 + _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; +#endif + return page; } static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { @@ -706,6 +763,9 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_ segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; +#if MI_DEBUG>=2 + _mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0; +#endif return page; } @@ -717,7 +777,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld segment->used = 1; segment->thread_id = 0; // huge pages are immediately abandoned mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; + page->segment_in_use = true; return page; } From 049dbf41bacbf8a839551cd3e7710ffa1925b770 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 15:44:07 -0800 Subject: [PATCH 096/293] fix commit bits for huge page allocations --- src/memory.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/memory.c b/src/memory.c index 94b6348f..214bf0d3 100644 --- a/src/memory.c +++ b/src/memory.c @@ -181,6 +181,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); if (start == NULL) return false; mi_assert_internal(!(region_large && !allow_large)); + mi_assert_internal(!region_large || region_commit); // claim a fresh slot const uintptr_t idx = mi_atomic_increment(®ions_count); @@ -194,8 +195,8 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t* r = ®ions[idx]; r->arena_memid = arena_memid; mi_atomic_write(&r->in_use, 0); - mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL)); - mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0)); + mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); + mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); mi_atomic_write(&r->reset, 0); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); @@ -291,6 +292,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo bool any_uncommitted; mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); if (any_uncommitted) { + mi_assert_internal(!info.is_large); bool commit_zero; _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); if (commit_zero) *is_zero = true; @@ -304,6 +306,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // unreset reset blocks if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { + mi_assert_internal(!info.is_large); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); bool reset_zero; From 1674d551ffe5dfffd978737786fe8f94ec7b258c Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 20 Nov 2019 20:45:31 -0800 Subject: [PATCH 097/293] add verbose message with secure build level --- src/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/init.c b/src/init.c index 081e7ce7..81413aa9 100644 --- a/src/init.c +++ b/src/init.c @@ -470,6 +470,7 @@ void mi_process_init(void) mi_attr_noexcept { #if (MI_DEBUG) _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif + _mi_verbose_message("secure level: %d\n", MI_SECURE); mi_thread_init(); mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) } From 74dbfc30bebc2e7e48e88edf3cf66b35c057b16f Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 15:21:23 -0800 Subject: [PATCH 098/293] improved security by encoding NULL values; double free mitigation on by default; more precise free list corruption detection --- CMakeLists.txt | 15 ++++-------- include/mimalloc-internal.h | 48 ++++++++++++++++++++++++++----------- include/mimalloc-types.h | 6 ++--- src/alloc.c | 4 ++-- src/page.c | 6 ++--- test/main-override-static.c | 6 ++--- 6 files changed, 49 insertions(+), 36 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa9c126f..467fad95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,8 +7,7 @@ set(CMAKE_CXX_STANDARD 17) option(MI_OVERRIDE "Override the standard malloc interface" ON) option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON) option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode" OFF) -option(MI_SECURE "Use security mitigations (like guard pages, allocation randomization, and free-list corruption detection)" OFF) -option(MI_SECURE_FULL "Use full security mitigations, may be more expensive (includes double-free mitigation)" OFF) +option(MI_SECURE "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) @@ -70,15 +69,9 @@ if(MI_OVERRIDE MATCHES "ON") endif() endif() -if(MI_SECURE_FULL MATCHES "ON") - message(STATUS "Set full secure build (may be more expensive) (MI_SECURE_FULL=ON)") +if(MI_SECURE MATCHES "ON") + message(STATUS "Set full secure build (MI_SECURE=ON)") list(APPEND mi_defines MI_SECURE=4) - set(MI_SECURE "ON") -else() - if(MI_SECURE MATCHES "ON") - message(STATUS "Set secure build (MI_SECURE=ON)") - list(APPEND mi_defines MI_SECURE=3) - endif() endif() if(MI_SEE_ASM MATCHES "ON") @@ -92,7 +85,7 @@ if(MI_CHECK_FULL MATCHES "ON") endif() if(MI_DEBUG_FULL MATCHES "ON") - message(STATUS "Set debug level to full invariant checking (MI_DEBUG_FULL=ON)") + message(STATUS "Set debug level to full internal invariant checking (MI_DEBUG_FULL=ON)") list(APPEND mi_defines MI_DEBUG=3) # full invariant checking endif() diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 73849337..452f0b68 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -275,14 +275,20 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { return segment; } -// Get the page containing the pointer -static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { +// used internally +static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE); uintptr_t idx = (uintptr_t)diff >> segment->page_shift; mi_assert_internal(idx < segment->capacity); mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); + return idx; +} + +// Get the page containing the pointer +static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { + uintptr_t idx = _mi_segment_page_idx_of(segment, p); return &((mi_segment_t*)segment)->pages[idx]; } @@ -373,53 +379,67 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { // ------------------------------------------------------------------- // Encoding/Decoding the free list next pointers +// Note: we pass a `null` value to be used as the `NULL` value for the +// end of a free list. This is to prevent the cookie itself to ever +// be present among user blocks (as `cookie^0==cookie`). // ------------------------------------------------------------------- static inline bool mi_is_in_same_segment(const void* p, const void* q) { return (_mi_ptr_segment(p) == _mi_ptr_segment(q)); } -static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* block ) { +static inline bool mi_is_in_same_page(const void* p, const void* q) { + mi_segment_t* segmentp = _mi_ptr_segment(p); + mi_segment_t* segmentq = _mi_ptr_segment(q); + if (segmentp != segmentq) return false; + uintptr_t idxp = _mi_segment_page_idx_of(segmentp, p); + uintptr_t idxq = _mi_segment_page_idx_of(segmentq, q); + return (idxp == idxq); +} + +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) { #ifdef MI_ENCODE_FREELIST - return (mi_block_t*)(block->next ^ cookie); + mi_block_t* b = (mi_block_t*)(block->next ^ cookie); + if (mi_unlikely((void*)b==null)) { b = NULL; } + return b; #else - UNUSED(cookie); + UNUSED(cookie); UNUSED(null); return (mi_block_t*)block->next; #endif } -static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) { #ifdef MI_ENCODE_FREELIST + if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } block->next = (mi_encoded_t)next ^ cookie; #else - UNUSED(cookie); + UNUSED(cookie); UNUSED(null); block->next = (mi_encoded_t)next; #endif } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { #ifdef MI_ENCODE_FREELIST - mi_block_t* next = mi_block_nextx(page->cookie,block); + mi_block_t* next = mi_block_nextx(page,block,page->cookie); // check for free list corruption: is `next` at least in our segment range? - // TODO: it is better to check if it is actually inside our page but that is more expensive - // to calculate. Perhaps with a relative free list this becomes feasible? - if (next!=NULL && !mi_is_in_same_segment(block, next)) { + // TODO: check if `next` is `page->block_size` aligned? + if (next!=NULL && !mi_is_in_same_page(block, next)) { _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); next = NULL; } return next; #else UNUSED(page); - return mi_block_nextx(0, block); + return mi_block_nextx(page,block,0); #endif } static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST - mi_block_set_nextx(page->cookie,block,next); + mi_block_set_nextx(page,block,next, page->cookie); #else UNUSED(page); - mi_block_set_nextx(0, block, next); + mi_block_set_nextx(page,block, next,0); #endif } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 893dcd67..9c5d3c19 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -26,16 +26,16 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 1 // guard page around metadata // #define MI_SECURE 2 // guard page around each mimalloc page // #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free) -// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. (cmake -DMI_SECURE_FULL=ON) +// #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) -#define MI_SECURE 0 +#define MI_SECURE 4 #endif // Define MI_DEBUG for debug mode // #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free. // #define MI_DEBUG 2 // + internal assertion checks -// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_CHECK_FULL=ON) +// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON) #if !defined(MI_DEBUG) #if !defined(NDEBUG) || defined(_DEBUG) #define MI_DEBUG 2 diff --git a/src/alloc.c b/src/alloc.c index c4863115..e68b48d2 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -157,7 +157,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* n = mi_block_nextx(page->cookie, block); // pretend it is freed, and get the decoded first field + mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? { @@ -242,7 +242,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_t* dfree; do { dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap->cookie,block,dfree); + mi_block_set_nextx(heap,block,dfree, heap->cookie); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); } diff --git a/src/page.c b/src/page.c index a8115d27..437cd0a5 100644 --- a/src/page.c +++ b/src/page.c @@ -283,7 +283,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // and free them all while(block != NULL) { - mi_block_t* next = mi_block_nextx(heap->cookie,block); + mi_block_t* next = mi_block_nextx(heap,block, heap->cookie); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet @@ -291,7 +291,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* dfree; do { dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap->cookie, block, dfree); + mi_block_set_nextx(heap, block, dfree, heap->cookie); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); } @@ -356,7 +356,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap->cookie, block)) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif diff --git a/test/main-override-static.c b/test/main-override-static.c index 19712411..b04bfeef 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -15,9 +15,9 @@ int main() { mi_version(); // detect double frees and heap corruption - //double_free1(); - //double_free2(); - //corrupt_free(); + // double_free1(); + // double_free2(); + // corrupt_free(); void* p1 = malloc(78); void* p2 = malloc(24); From bc1ff7e7fd5b3822d36b06fcfb532efd422286ef Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 15:28:10 -0800 Subject: [PATCH 099/293] update windows redirect, issue #170 --- bin/mimalloc-redirect.dll | Bin 46592 -> 55808 bytes bin/mimalloc-redirect.lib | Bin 2874 -> 2874 bytes bin/mimalloc-redirect32.dll | Bin 33792 -> 39424 bytes bin/mimalloc-redirect32.lib | Bin 2928 -> 2928 bytes 4 files changed, 0 insertions(+), 0 deletions(-) diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll index a1daf316b56cb3c885dc48f1c5fd1ef0ffa302b9..77e40c1391dc4e3f6fa08c30c1fe461df1b0fa80 100644 GIT binary patch literal 55808 zcmeHw3w)eqneRIxv1w_V01X61_}WhCU}>3Bg!Cqpl9aEQwnG}&h}FzAnKmQ2408zs zr`6Q7Yq}wOBI0kmuCD1ly+`+0iXvyg+6G0eoLx8W>M@7ab0$0GSdSNitLFTl_wvm* z-%Qdp2M;7petEz5_Po#SectE2e3x1GNH@ERF_w!kGQ!vy#PsLl*Rd}bP(ACGug_xt zW%~E#oUv7YZ%%cCKcoeMEn9=$Cauof+}sk@Hv6<-q*?PfYvmg@YE3QmzQqLvGwc%e z65pqP7RkN%tP1b-ym7jf=ihjCJK|q7#ZL!#eDQRc$Jae8;{VO_RsOmLYO6SPS1?xp zm($qug+Kh5D!atyYFFpXDq@eITy@wYiV-`a#DF=T!%!7<* zq@wh^C@G3E_6I=sblaGN8u!@PO2PwbnnJf&~~W?`WHrV-i-Yyvu$Qrd87Ke`Y$n3T5lnwBn8RWL#wve-;CziO0IVZ? z&?(3F!!)?nsU_LmEA%(C<-Lpz{Zhka@C*_rvc+;bByFD<~Oi?Z<>aS6fDhF%~VADNEJ zS#-tFm#@aui(7>qC*FR zPJMT-of4(Qb3Ub9lqwRbk$%d!^!9D`A{Isq0*2s)r>A!S7`c&{!jR9?K$3Wl8apAG zBaMv=l8T~q{z{0d#v)MbHoJpGWVU1_(icrjdHWEQ@d$W8qH(imhb8NX~K zA(yWM=AzVJqMy&j@MA1-wMCImeOazORHBrv=~wmZoi7qIj*k4qYvZGxBqOw#H`GQ( zlbNRIRYm9pwksCCPv5cX_WNOIXcoE~R`tH~WQ{Y?|J5+-tA3z&en;;tE&vD_!%P@V z9z%vJGw-)+Y%K9bwE8)w1laKZL|)kNmFi6yOnCTNd*7g1cp!udQd zYQ3fKg>t(Ga?BvTdEzuhf5jR9Eux{vZ~?bf;7NydmkJcE)k|L^b8zc#hV31SiiB!yq2KK~EnY)K_M8F>ZB9NvaK$Sajz`{l=Crvav3tp=L{e{%E#D$Y+ z2lQn~g>HrJ`%ueueZ!vWm7RHW(XeviOKUqXRCeVR@P@gz zvX1hikqiyrI7^7D?Q=S2+lD@lp$e)?iLz8O(DH*hE*{ zk5b|`zhgGMT3#u#!j2bHsw?jn0j42GR-8!=lU1T4^9%VPT1lEI5^3JSR89^QWz$!@ z7`ch+4Zq2OW%m{(&hR=D{K6MA(N)?p(WhAmH1zoLVdx#Uh2F!3`cbC}S9}*LFkz5K z)u0c?J*tKT6V8Gv?3Cpdb^?Kw*zM@VjFXjO!W5fbT`8Hc9=|b{k<$eam{K5E;R`5* zd%aNcsFlN<{U;z%oLl||@N7Xz{ z1I87;T~GyAUcr^14wm8X;_E~iw5hG=7yal`TnQvJF#>Vo83WIa1|B|%ay)qk9{a+6 zc!KdO*raPpTIJ*Y|6AGroCRV>6RsjXMZI5m`t^+n<*JTu0uo4$VFe%-|)^T@yJV*Ow; zYPo)}54{BIRv>%{G1MnZ+$=I}X>4dOWx37rMMJ6y^3E2k3GP63X$9QM-MUy!xcPiR zW<3Y?eisb~Ej|Oo3dW9J!D<9G@waH0y%0lJI9BwB#f-z-VAY{<3^3PXO3XoGrkJ)0 zKjVcYfZaT}g#v0BG$i?JRPh ziZC~iKKpb3+KO5X9HG_@fWcbo-0y?a$lWs96}M{1i{deKf>S-a+M& zyrB_{e?rfX1eV6yhi!#Be?(AHk>x~yuhp+e0iR9caB>`XN>yEf64xbhEZMeRaU+re-B&E%Z}{Nn8hg^vSYuLSWmk-Ge-dLTa+lt@)836v(%WO$rOLD_>vRnh z*EQtzYvQl{3O32?VN)2!!0M4o3Pk2&0lBLdLn#0X!dpdV zaq}q>=5wc0!bZchRjcp}*q2U+RDA8B6R2K?B4XA}qgpE`TfP%lZdezn!V~{7N$~{$ zOGJJ{tM!p>WAP1TKo{jOv8A;7DQDZSq4XjQxAHd!s48(S;amcrJDt;nK`$ki$}_UG z^y|dD-q{~t3ziam|KW^p{|(r?>G#k>SKbevqxyo^mKl4Z=vXVeQC;+ut{BaKkK(d@ zI0TMIe>XA$mwnBz$KkTMzozLy5ZZBZ03!gV7e>V3pyP;bg}a`}V{A*=nOvLR^^846 z9oto~glMgL!&pL_78TT(uasdU8YMKi;!-PuB0}){iv|L*iOUCdheF^mAqbZm2R?-_ z?93%DV2d}JIQl6LY2#l6LLC(}fNm!|d;r~!1ljfNVgcl&EU$1XQskiL5U*HO6z=+K z)LZ%OCiIm_esLryk=&(_+-2r>o{6Yc-a~@bL6|Ai^u+m;u2AY7dB6W!Ix~GA z1vB;}GnGcmO4YK`%*dLF7BFwIBXoajtW@&MUtY2okMK&n2dqlQfW_v{TDVgo1?D?JcKE(9LC%d_oU5ZHY zRbBvK1v3c#;_Kt!19KGCn%nGAR=AVT(p|+56E@6?(fC{rkypG92N~_F3J|gI^)%Y& z!By~eDM$3n|3v~7AM!Bv90?x33hjV60K5SvCce37aWgfg#7w9nA5`M!(DGc~aag!5 zaT`JBM6O3&CDk289gK|bPq75G_)98%2BlbrKZzzlGz-Q5nTlUSF$da9xfTKRkN~;? zyg!H}meLTNeDg^0YUD9VhxHw22T+imLf_H#z)O}JH)2|)y9}@iC;Bb^k3WZr!6tFb z!G`YtPY;kqj(Up#_fi}%$YR{0T>5f6LTyuk#l4o|-(V^g23rLX>C`l`VD4!&vOF+= z+X&?CGR@RLdzqsnu2)HK7tzC>fW=?`*`Ji2bw9b|BKa&@$Ob5GkMWyKPGj#_*@bgk zIBgJ;ON{nBP3%{+N8b+xh${b5$TM)Uhs=*wF)M{E(VQr07b_Grna88Jw zF`W|*et<}#MP;W88~kc)KvB+jJC2c}!qYJ*d^!g>=PJUU^(^r?v4NO!eubS0PVb2! zzGG{kR4LyCR)SM7SNxMd;?U#e_=8{&bq+}BVB4-E=j4J)LP26y;pHA?oM2-@4$$r#z8v73k&W%3WyloOy)-N zP{%b;XFByC3{s8zj1r<&V>NeEB8!GZOGR2NRr_L$WpbhhIxY1h#H zpWgNH8@$#**?zIXILrwblF)qW$DK0#iTU?)gJOnuksriqBz65P=#F~1O+vZi+awe} zv&4FspyE`kj*4)DOJRo#E!B7P^R|F7M9{5#k4%?37w$VyPm8-aq;={7H=G6x6v{r6 zPpBV_;YbAs0G;KwfCb(`@Q(IU7KY=XRCFc_e}pGHUEm}IxklxhPSCfCB@Hg2ns(pQha@_$y;tGv(A%D zjy5@{ZDRvjF#0k;F&}|np07r%!|<>lFg&bNTDpsX9q}lDh3kf6a|qY%|9D1bO3A;1 z%J}qT@~1m4@+Bvj?B!z1qsTYy#Q9Ep4;50!h|WXGMJboEBY7xKAUaj(%8oFWsVO~& z04=(e9hu;9Tt{9cs$pgNI>A!5(*;U6#z~rIjp4Aw?Fr)^r-U!zj;FBw%Mt}YxN>&p zc3ALuXo|w+DK2Py!%z4Oy;Wktv2+U61}et|#$5Ppu1?;`Awh9pE(J6|jue$}$ep=6 zER|7e6irMUDia6sJI<22$Ja}5pWjN_H@I}gZ}_orJACd}NGioogK+Dlx{=~jA2nNc-q191gxE)5GaLWJzY30ZYQTF~A1VH%r2#$Y zMe62@72aDlWS88|9AqV>g*(5h42}8;h53a$mEoWTH+3iBKn9B%Ztif+Sfb9NQdB{I z`ffd6IBeP@@e#qN&V0&%S`rH~3X8xg;jAbf9Hjz*0|x63_r+eJ z@tfB}SF=2j*|Ff*aQ#_;_+)nVLuJANArijJ1E$?UR&>N&c+P3Ar zu5vUA;ob`V2MXf92O?9mnL@{Cq$i}l+hdx^x1gqhU>c~*LCX|$r9uNjB*h9YkmGLTJ(+hOL{9y`RN{e30!f zh;|p-Mm~lgHHt$=sD?J}^l;eNkAo=xVS3mNq%_?1v#m-1N#stoc;>4ZUeO-Qp@+Tj zIGown4;=vb(3fygEnqGR)exb&T~!EFow>I5SRO>krBnV~4lE|C#HK`zf;Ax-Atd*z z3KKuW^CJ#boWt?7<7_iBTa~>>;0D1&4qpV7ylp+M^v5RvIJXiqF{nLSf~|;3=QG>| zohNe`hER*Mva<}Z7eryFo*ysxAv(N$)Q!5d8Pj@0Zu+qvoGrYEg&?1h1uLyNiH{IW z>8w_*?943aQSLGeZh8PC=hc4#X&6wcsFB{8oTm~eg=uoVoe#pV_Qz|e@R&bqQog2AEa7pTl-v_1>+SmC~h!^}HY)z&1Ff*VC{E~ZjjM=v-^mWpSThe!< z)6h|QL;uRvLyk^PIdCZO7(}AR^dK4aGxsgL!;vZ_GuHU*81~{Reo`{lYw@*UR9PY~ za$x&ulyEbqlKBdTTd?Ci?fjBsiJLHZpgDW@Y+GX4*YgYl@jqlBFa)-=_IMsYwb37a z8Fa7?-<><-vM#~YTbEC!=#$TI*nH8#{$^5|i)I&Y?^^un|3H5sE4A64dBs1<{?Add z^kq7A(Mbvy@9p@K|27WWOW#UHlM@?XbKv2T81d!S{kRNstzxrIF?o@~o&SQ4NUyu9 zUp`St=v;uN^DkHK_PJyQF09ruM(}VDEX?MDg^@D&T;xjpGY2f}+D>@6OzlwzE8O)A zk)(Vj<}d6bI(YsmdmG}oeL5<^rH4$`!@|B)GUv-}r2yl%a81OaEDw6f<|6V+_t2rM zgm;XXO0hR-qo$md<{wpSd3tkNyC+Qh$UB93nn&H=Q9D?);8|tL)5|thyX&yq40) z1U1a`^ss2*&d;f47-AUivn44PS5b}LIh{ssirPpXvj5alY5qxhfPmYCkO!O)QBcN1 z;X-ecNhO>cXPpHTQTBULA>qAC_%g)H{wm{w*$LOUOw zc2wDOw`0A95{Lxl(>CuApu{mOZSblS-%qT^F{x$)1_$nz3Ji{se(Dooz!~Wr!r(|^ zs5da=QA?J1l@3ug98wZSp_}pU`Npz0zLYd8N%)Ix)4-M|ciXf(V?#8sXk2-elY&`b zxCw(IuEj)1dMuDG%0b<|OfTZfB^L>7H7s%qnI?gku*h{hO>n#QA~bysP*9JSiTc-n^op``2v4)Cl zks@mCR$k31HawKqNT%c%kQ*>ML8K#gj;HZWGG-8$oO4#G?=%aiPrWiS^0rv;SiL8o zFU5*KpWg@}`vGHkk{I&Styl<*+2jsoJ2(gg1jd*T^};}}gXW%fL33|?7pnrCW3j|7 z5}Y4I2#cqoV5gw~Nw4H_WAXf}6)tJGf!ICGfd-=iQ7Vu!^PCW||&pm@?~` zlON?`wxx9yO|@YsO|z!LtNJm=LPGRqe>?|nM~Fy9+(j5qa=YLok;pibWE^r*cMwL0 zg-lN3$22M&taXK@I)&h8v0xDd#gOG@BA+yMB)+NoAUS@=-f4~9@1xc7_c1a}3QDb` z3JwWzpin_vd$_Gb$NemfN7jsj&e{F2mLIraEziF!#(@OwoY^0DQ4tNG#351_kkP38 z20?!bRaoP#BlE-jP3!aP=wOy-y9(~X!<_Ml0b_c4Ao4y^ScAM1{dvOjtA31O;}H1r zd)3i&WG+uv6`#6bLG+~dHc%vyG!jT-Vewq;(C=V4m=60^kqES8Cpfs~1t|Q6EY#!o zQsb`TFYp4`BW=(LS7IAV4;XD-$+r8AwpWR^er$?)z!>hV8jHb@$&gYm#%Cc^(t#PIo?ao-6)0=b$g7;QeWf%_Stb z;v`&GK`v<`$RPzl5r6Maq_I}O(KMwy-9-ZFY7)*s%oaF4j zhM1pJSa-82`gDj!9kNkdhDMp-Yb3lo1AO}UkwoUnM(@ke$YdRv=*96qCJ8l&*eaA& zehK|W?vGHF4-VSpFcTd(ytX#;Iqg>jjZ~ep1GxHUrwjr9Eo{T zgUtg8^uquEbVzEpm#N~#})=s5WG zkJ6M2oq^xK9SeTE{N6Q+y^n+68%Jq68ozIiH9j-gpRk>Oz#26}uZ$+2$sbG~OFvw` z3`DK{`RmN&G5(dkTkQesS1XcVjDjR-&ST(9b#gx&1=SVt^V3m~T%I2|X?e(nqdV*e zNm9OQ)v32!@ox*v=_e!^R4w}V-!k`-1wPHcW|G$oH2Lc>-=z(DSNtDwo+2hJK4G}x zpQ8hoTN5}OSoIh5j9<8fTc6?ZM~cf(eAy|5>pPVmCQ95Rcu$`#aeo5aQ|jJ_4eD~a z{c~)t03hi9^p=eDUq&&}&$x~=nOsUTIlj$I@slApPO8t*o&zkoA#z9r zH~ji0zrMw<|AFh35X!=4HIUUnRs&fLWHpf0Kvn};4P-Tt)j(DQSq)@0kkvp|1AmSh zz&Ta+@156x`c^tUe6?+^Ow!kbzm;Z04U z*1AQ}yYJdm=i695)hl{b|7qFob;NirB6qcnH=AODr}E+) zR%~0rY%_D%OxrqUyC#QSW4lh!c?tCj-wiqJ2HWQa9V(~xgzxGccD3z;LXLgNU!Q}Y zI9QadC;16Z;UPI!%k~sg{?jr)SLQd%nCeLnRIl(*OnIe0!b@l;Q+!Jgzq8{ zh8+CDOfE~C`xe(XHlnna&xqb|OR%=SrP-HS;&1kcsYK0}we|jxcXK0$GRl0-qZHMK zeZeN;A^VrrKvn~pG~nc8l%L2$8oBhpO6+|ZL zjICR{uB@_h!+o{oYd4myuB@o7swiJuRdFBxa3Pwc)o*yXqN-|bd4)u`WJPt=LlrAN zQn7Kx27J6A316{36FTNxv8JqYW5tT~8&+&sv&IAuw5vC4RA`fU+*NBgRIROE`$$D? z<%Y8I+6`6Z6;+IlO4nUheV<;tcAa}eRrSU(;MQ!Yy3Zu772o=nR_#tF{+BG{l5URp z8|#;A^FvZ@)gRo6qDYA@~)=xkgqNhRHbRhtv(WjRcHZkxUPW`nD=bmV3t|$9~j-&pjNR#%7NK-%y`JV*p`Ad-ag84@ zG?}O7-GUV*sLjwc2b@=!CdnrVuF>E8sBk9MQUMQok!B_Vvv9+Zos2Nh3?Q)Bs3|Yu4MiWQrZmAo%a%wA)YRPG`vuXBQOcTZBebt$iBf^uJ) zq;f~4yxkL&TdSPd_;`nxYphqw+do0MJ(E=KK_&0ECf08~lT_{!DR1cn@x}`zN8I^IH zMeeD(Zt0eC_Q?2*l3%XV2B+vec0tNHDC1!n_er^nHcU-_S9DIurODVSV~>ePO2PQOpDJ;`lygAFmt^deat}_?e$W4o^ye~m$hc3+>zShc z?UHiV$XMBNY>LXOfKIg7q4`qv@j zslFd`NjYm|9F?(4+PQ0r+W8(S=NTERao;WFR{5zG4@h~3WnA)#u=fj6ZqF3$Z2Zl-p}V*yzA>%ulOpu;*DPu-U$CBN-cj;Sc!mUreb<$*k9Z1 zdz{`yZwR(LuE-D_Q?Y>ahfrPH;PVE+<5s-lYd~|p(yECS!Y}oMx4^eH`|3rbv|N<$3nR7Wh8e46JH=w3aJESvBWp54DmQ2GKC@_)wWB$AV;CYtdxZO!0X>3F% z&CZk|DttuAkWP@Lc1m7eLimjm`8^)<`|GGqvr|zofR!$lY5;F75_Bx3qr%DF7H{8k zyD{`kh}0l9)u-qivWP(}0YFD}fg!H~NzoHmvck&gj-~(P`}`oF^qy=j?9*JXK6RAT zwSZ35e-cA_xxgTGZvb+*ly4A#pJZN(z};Eljj{~z zRlYL%cA^)*C9kLmJpfl)owdDTHL|y%R%r>+a5&e%r|c-DS9yP_ZcS;x^F=t0n~-%MXXm3lFisZm}l0XSY| z5vmcjlc-e@hclH4XRB%@9kLCHSdD1~!Q0Dkd{CJsZ4fyKNDpxmT0eRjA%*> za!f0-0#1x`UdHhmd4p`yzD>F^^rw2pRA=;-MRy#(g16MA;WO1MP7J&zT~a&P2OoKa zpI1raGT;rF2wL`s;^RujiCL#sJeImNI_7%SABu;R@u#fM!e<5;Lq22NWUy}w4U<|$ z|Bqg;)Sm2rgHEc+rS$74FlqY80A~;+;Ot*k16d7ZHIUUnRs&fLWHpf0Kvn};4P-Tt z)j(DQSq)r~2He*$_9X--eOUTh#vVr~Lb?a>#_JfmLwGkuI9Y*y|49Mmq2T#=d|M zB^bo#5j;p=Kz#E;#u|`zA^vNG0Kp)B^mf!E-Gg}EBF6fW_8|TtKD63P@OLn_-^th> zq}dY2Rv-)@?MD1vgbt)HA%1KrW8Fv(B7Wd5{1yVz0mKJC2s%i!yBRxu5A=t$vy`!C z5PFdAL)^Oz`UwLM;`^3ECn)z&jBp8QZ3SaH5lR5lgZS`D;3Ig%i&jAgNV^b+5nM=j zBR+jE`ish4jLj{B-jMbn{?KaZ6X`a@Kf4dO38tK}=MV-#YY_1t@kMke%H8-@y6b-Q zBg)$lpV6UnlozdK>=y`br11?=R)yd}8sF7qpFwCq8sC~^&mshn#`jg(MT98Q_$DD+ zfY63CzRSqm2pve{n|(HkJkN81^3j~iiiZFn58{&NkgGl4OQ1)YlIMR5Q ziOpG$7-@XxfgMH|MjG!akS(dW_eTOvOC1ooSijc;0+^iL$1Zh0u%Z?*x zNaHzYb`imWG@gNFHz7EY#`BTPi%^O*o)KkVL~tRE=Lgv<2s+Yuc9LE5AeteK=SNv3 zLJiV*wvcT@@F0!nk=R!e8j!{_c2j)T8gEZbXVNWCUAl*~V*zXW}k;c1)>`8<^Do6b6!_Ys{*}r$c1{yvq-g@08 z<1fnic^SVdu)kZ-BRVUIrYUcx{ce}Tm5aNSYw?l%vgVL~W0-yd zfxVi8vShxx<)LPOT}!=BRPUwD7m8}##^)d^6`HF(sbf_~LuO-rz>z8)VJ4~_qu{N{ejrr*sPz;C&MJ%qMY zF6lR%FgIl=pPq|333Y+JHnu%ER8yO0bA|?1-WzRd)bJZE^y?9;7A#rp#7_e>W7NYT ztXl9;^_oSc3pDW~B8~VptW^u1@P!uK`=NpvE4`tRuW2)W&;$sYL#q}H1z8rmG}t>{O$@} zS1s^{);71cJn9QB&?5e_x;l84RSULw8$-SY+8qX-m3NFzW#t_yJgvOL5Dv{(-l6n{ zjL{3(L{5w==`q(a{aDR0=ke0xuH)|G zjuXxkr6=?g+DXUBl2h6#5a|IOL`6L%J^6c!_OgBX`-=8y`-b^WxjB%_c%%2f4t`e$$jQT?8KgvJtwq{EPak7XA?oNM>3e(H z_IB*uvp2f0ZD04k!2am|j{OY>0tea-cn*^M(SvOVYYveP0*9i9+=odI4Tl4V^UseMX H*1-P-7U^^R delta 7909 zcmbtZe{@q-p1<$W*t9fl3Qb!ml-H7Alv*<7S1K(8tnO3l7!)<*`XdEeQhtX1km87q z7TtDZHES)!`>3WWAVP?-sE$*qVIOQml{oI%P0>kY7 zwK?Z|Ki~WPeSf@rUlPyjGdveWpQriY%=f?hY+KJ!l}@F1_kOPQyN`YeylY0F_hSWn zdVjCr14rX{RO#PcyV=Ej3IEt{C1lGp$z;Zx!G~1Y6e*Nuis^b%$s~dzHtPX;axhjX z_-UcAVtIHgA(x@^y9HuqCLV#5B>|lRnVP`_{NoddpT%$zVx`}RMM-e*A1uh5RoqbB z)Bwfl^=Mf5O8S+Mzv9gluuf1a$PT*dV2uSG{BRWd>hvqGmKJn5rG!2-NiRV$q#LEaZp-Dz#5z(w-UCcL` zC|HQ*@QrAawP-$((0n};O^}8El*tBB%EX+REXnK4`d?W5xlq#+NiS2oW{K|f!-Pa# z@H&wi73i}Xb7>DI#H%}Q63Ce(Es$Ri0u7Wxvj!zk-Zg0w_FOY zev$lSxl~ABOSwPe{epy1yY6Iw=BDQ7y%=CMY&4oHy_!SNS1x}0cJF9qyJmlYZb;1+ z9}3VtsV`Q%C&lZo6DLbi!%08$()}}}87DUywXIoN6dOc0UhEOZwQHV_oB7bgjM4FV z2I`#oLf+}=Sd5j4oM>2fS@u>MZPA&RhBMOsQxHzmi@GJko79vp37-z%mVR0kPrOWZ znF~bMJ+wIUfS5mb`1Q)RC|ZR4eaV(&^yp#c4Bm_XnNgl6PoLR%!%1 z3lrAS{51t>zvSItFq1vE8I)8zxv-iESt{R}3|%=FE(ey-z`86ZP1A9yMm0rMDPw8z z9HVq#bx~Ru@4W& zoonw(UisvOAQt!C3ZorBTHY|S{VZfWEzTgMX6;ClAh*9@^g)H~{j+^|DDL@VEcSHx z(1JO1cTRRX*I1cm)*mU&Svi-9o(?z6xymt_sb4!!|CCdamx-tK+Q1`C&~G|VZ^^xZ zCeKONvJfrPOB?Caxh3;7|J3@o9=9-$&(6`a@OUF9UHV^=&SEBw=djyQ4V^!yq`AUa zh<*9xUJM2s&=|q+UkV@%*%>EM5NgxO-e-*=h@vITuHDd& zh`(mb5v_m#rB)(nC234KqnX0fBuwTZ?W>3$)kDv8VxyKVA*(Cnq~-${A#M&*MZl5< z9TJQP*eNl?uOYyLXQzXUpTbkJ{m^V4^jbbfwn}85MUm&pUoJT|X$tf3a zCuHe+QQuh_&d*8v483@DJI~ULw1=o|L2_C(6Jlja^zxUAFtorXR+Q6k7u+J&=FpYW ztKt{gbW9o-zcA2K1;t{>K)))uMQk(BC52Cj-(=CD!h_Uqnk!nf=nm83?943OTkVxb zYh^o*iKU+`eJ^Hssgz141AW`H>c${avBmHJ*+=%iU>srC{;aY<(F3!Y@PIKy^A>%Y z%v90ZF?w;)UNK1Mf6S}Ffvnvi%a}Eb;*Z6|!C7gv#|$+q1;P#}Osttau4OA~A&p5T z;mNz%(Q1)f!V=p+R;ICHA(<4NCM}^Cq~)|FwS>CO*(EEKFcOR;vR^ZijFrH;+1ayD z{ds)5$ZQiiSYA11cw)uyh*=h>rD%>g`x0GWv_Q0H&>cmM!dt`Vi}Hn=b9nkDJ!zQ9 zc`IoYCkxfOa+M{l^sdGAF&Yf9+m#CGMl*q_Tyr>5ocl zM{xXM5;tOS?YjMqsQcn4s2g*}&PiyL%LZ#Cx5-EtpPH*nVB5)?$xb2dm}`lJH?vu{$eUT0U2hFLPsQuNvPD1p zODuNfE@mq!dsA@nB!q&pym4W(Ni`K!>ASeq@loa1sd6@chuoH>om_KDqZG=rx&DuA z6DU(^iE5MwMBe@$l5S*`%xX)@#Q5cRK2Sc?n_5n%EOYc(ER8KX@5lIlkmz7pPID!O znJc$#H0tHHN+UMl1qYjS8>?>;rv+H$re#=*Cc?{d%PnQNlMkF>{l8YsP~Q%$>W*i- zI%$6)n@tN|UvLw%g=~3TFgKHiG~}?v!fd)H{}8uyv%J$017ZLf!g5f(TrcY|leNYTj;W3h~rVH|wxS&0&2ZDNK$K+cjN zFMrmg=%veWhL7efN|q^_mT<>|A`Y+AR6m2@^`riMMs7Ya4Y z*n^E}bgI*6m)k1@WwR;+HY?jaIV4P^GE0LZ%Evan1WJ@Ns&ej67mYGpNWn9E)^a}VxD-G`ws2~V^nf~K)?nxpSd(`_qn z(+^!@1LqYWG7YcH)@q~QzaNV!Bdw-3>#f(_zYCEbpyg-FDpuOhxnz{FP19NQ+EHY_vk^S(Pv3&{tPwH~%*(#uoEzXRF4Z`dP5X zKeY_%R@^MHT6270qjQiFne2>I^AK<1^J58T_T6I@n+}OI79)^_yV#-+!X-Z3Z1zZCkZ2*Ihs>6{$@IpZP zq3ho;{Ee6{WZivF#XZYv3|pTZ%e*Ua;iV1d{~YL|jz=6q((s8#Tmmhqs>s}gciAL? zB&oP=_~A|Kh5QtOq^yI=AQFSHoCs+mNfYiQ0^UtC1p|GxD!w+A}?hi&$jPpm3 z9`af}WS}h2K~O!&0{J@7Mvx2ibJy_0HQx${ws~IblQqN9P(-8ixY;iQgmf^(*M5zD zAsGA+@QNdyUTs5YuLJ3urtEN@ectWM!6VH{)kmd3ua!FieQC0SY%Ena= zRTWC8TwLW*ReGaZp^vLVs%kvp72&FJRb@+fO>>pLj#ubTsLV`+iIP=SD4}w4l}A-g zB~%?;HKM94Cbhy*u9{L+?u06>KAtO=A=ctIIwO}@REf#PB{D|`N0g|1Jd5JKU8Wse z)5OtpjioJN>EoIXj=^gzLkY_;*Nk!;zsAy+u>4NdG{ncG=cv{hRxNcB*H{=Tqn5eE zk&w8##FLQtxWunYOd&1_a~$QUX1p>pQLdR*EgIuBs+Lz~M$a`8M=Qrswq`K5TpzEw z%Vg&oC&wm^gpCuH&V;3pYy2ETD)N|m)lwJXnlXlKW=vCDqJ1fH6D%~*I4R{)R-e&6X7_n;tla?!fJNiG}q|&#(jTdGBXZk+BiBms+C(3 zSv*|h=NP`m(ym(0k8;iQH5Pt8gMIV0P4TR`9Mx=kbG)v;`4+CRU1Kqth+*d%=QS2~ zHHOK}H6D&Wj$vkjlx{XZSide)kZbUhg`yed=;4-@gk_X#ra5Z&#XSh0>yTXb^T{FftQyjIA#bb^0`VxD= zByo+Eql2T5XZNYu=QnXp2S+vQcw!_YTrMhbuH%X24wY#=Tb=6xN zcU09$JF9l!cj+ZHb=B3y!zWJW&ZL%IX~Q4vS}PPZxN7U6-zROZYOJr86vHYdSan_9 z?z;M7`croa4Y_N?+Im{B$56_mU!4=~TWag8HX(6wsw9~k>h?(uyQLkqyS7VvYa3kB z&f0i->e?gGXZGAY{9TPxY_{MB>HQ${W1;xB!8?IJfNbC`cqO`M zz|98Uofq8TA$^qUE{2fROn<-%FTtdc}9C&XYX(rR?`11(4 z1R4jg$B*xKfUIzE0#AV?@T0(t0yu(~3JKW>vVg~X2I&O3!Q;h(`~lPi9&ZvPc_H-R z@tQ))K&{~Mu0ZyJeBeC`n~@xy4s`IQKt2Ka!Q=iXq7nZzfY$>{Kq2tB{K;0(2zcD~ zpFvviIO>TRqz8|ygxn3v1&_;uJPVS*<1iyZ(5w}*vB4oVR9< zw&8!Clgm*(cnN<2y#k7W4+HPCVhX{BfVNT;0NxGkxf$yRo~*(OfyTglfK6qXDI^F3 zGs_VVyajj$lnZ_eczHFJ2E46#4c@*%R&+eT=WVDO{5bISwMfJqfFG{I(ty|B0$Bwd z!H)uOzZH7|yc_u3ZI}`8L=uz%+Gr;O>`cw;yGf5Q&Des@mPJSmX<0=``p3d8P? z4<GgY^lSTb`@{W_{?-BCfPK(0 zm^&m5jSoeKriO?Qdzj6QKj;tnNBj}LJJ1yH1X=^WfVI=sDRKI}LVv7M}zuoWfJ27MWKyE<7oQ(y>1JS?~=FQ((80-vnj$ra!UG6SVm!;d< XZR?gXfz#cjN86+C$?d`Y=oJ44aBk?6 diff --git a/bin/mimalloc-redirect.lib b/bin/mimalloc-redirect.lib index 1e22ef12aaec87c9544724a5c766ddbc9bd90ac5..550db8ec56373737c44eaf236727085670796ba1 100644 GIT binary patch delta 104 zcmdlbwo7b6m>A~?1_lu3a)RPLKzh%_oR-N!jH;7gGx1N>VN{#=L3eUF<8=KKoD2+3 pTnr3*fH(lizQMu3z{|kEz{tSFz`?-Iz{0@Fz=mLLwq^Ru0RRTM6hZ(1 delta 112 zcmdlbwo7b6m>Aa{1_m&80@6Utc>;td=Cn+fV`Q5gz^Fd?H5312FJ|G%=8VDh!4D2(PEn_i((&rM#Dquqc+eq(D!H?F;+yZ5m9fPs6$OxqaDoq_PO^$u*_w4_@&p!K{o6yp#ZaJ>r*Ag;&AHEvY485$ttjbSLY-96JUT((ovn}EYKYI?H z2(ah0m!;>YY+hco(MPhJ{Nv#{u2hx8ok;%EIt4etsnaK_ChNHpU_=Q!L64`NTflLZ z457z_35B_Q#hE&2W zNWrj_roWq>iSYn&zd*w)@NY2jb_}r@815$FBTy)qryLPXiH0;krx!Ia6YkN~P7ps% zpf}A~J`OofcrnQ+v?OXwLkm^<-WKs^$Yt0?6D~0Am(ap(evM0h^Tr8dM8qP0{fYJ|6B?wej(YJMM7n8z)67v2kG^$3>HR59pyoMxM*mA9_U>ekLB%4$Kdw+cv{2|A=bcV0i}_d^ zKOAunJKU*_89U*+uvqKLlSm_bT0er$R`*wt6o#o7iSV%m9(YpNx`(KdidaI$$R7Ty zRzxXuXuWn~<=|n1I=YDYv&XG%WfGE_@EKJb)5nN%){v0tf@l2ZnXA7X9YumAugW47 zx^l$n5&!r_SO54&Rq7W;gmn`$=ekCSON(rUo8w$yUW6ae&PE z4oO!>v)kReHg}w8q|2xA3l}DBM;f~Bo*YxD{DM<|aUyxG__Fi%BB6N7<3enT0o25) zg^PC%?Z9fD?O?hYr>+L z))|D^eIbyla33XF)-|DVrek_Abc>>sM7kUk&dt0nHC^HFC*)6KEbQIg0U=>lR;7CY zYmkl6M!PGH+3Hl_biTgbsNaf0n-t=eg)Oyigc>+?QM3qYx**=AIpB2C3(ed}odvy2j0`^x((dIK)g}sR^Rlf@%SF$P}sk&eGi@9}N zaqBqg3^yNVZgnaKI+B5_cIO+Q&_7K-I72Xe>B{DlnZEKF?1bynG2!o5Srx6f^IQ3L=k9u z6Zb-;D!K^F>fm%=E7AHT8n!CA9G&FrC+w>dHu(e-L7^r4VuXK1!h=oUi0P9G+a%Hq z!_&iaWxvsmb>*WeER-Higg+elVBHxV*QGMAQVIUFSqt>h(b0%o+otLu*>%4;&weT* zLsD>@m=+a&kv3~Q3Czn70_?{qxphJUQ?FPcM93O_W6fs9_Q2fhBEJJPlP`G)B z3y!mP4^mY`D)eZ9Lmsh)(a}F+;pUh`-v>HVrH!}^=<01#utHKBZgY!~3NgYDCoFfi z@mC}6tKrkBZSH|rv~-O*eT9qg12{txO7)d(?vY5vNVtFtEJsfcH|e-QeuQwt31#}W zia4Eed0b!?4!Ot_M@1}{2uw7U9e!x~@#vqy9X@SsaU@oHr+5hnk9JVPg%meW$VzKIQSyCd}{)zwSZzuY_Ax= zs)!znOn7Ym8=;*;PAKt-RFci_%N|K9l{h#DBpkyYaiKzCnFuBhCV+w+qeb1q&7on4 zlS4ysyhJL7%6&Srd;AJ1#Ma`e;bO5zu)Mf_VAalGg|N&av5mhNabLuliEt<`Y(Fiw zAGBVjDQUf`$4i92+Q#=IL5tn}s0ZtXO>GtDPtu{dsg3_bdwUOBV4J(I7KJwuK9f=2 z&Y$;)8&auA2X4vfWg}c8wU@I4TrqmfWsf9{6MeYc=Oo9kzekRP7LRDzaXw5piHc!% z13){8JNS#qyURyMM+J6(FUZvfaZT`u`)3HD1vRP@!r+2mTc)C_Sy_%pF@+!0aJV4M z!Bs1=Tu0>Qqjv8zceL%T6K+)Nd~6fn6mrb5SUQ%6_fQjoZ9R&64+ z2_Np(dU3iWewNqax{6dG)w;G4-A!AOG7Viw!rgk0_=|THTbV*fhPG&o6)RcE=Ymsj zT!wO?4@(%GS*ro9Ow2{hm`i?>6)k+8iIPtG&evVO69TZycA{=EAICmo6);SmYseWmb#MTdsiR>Id{T>!G-lmy=`ZmHuV#q``Sc|D+rAfCklth_Wc%5`% zSF-|5-hCIyzD}fu0uf99%q-z%vaT2HqnCegw3ok9(aXP&_p3v_{0De`i1|mlUS7oe zpu3mUDks3N$k)L?@3^bpOD|QDjB2&JTWD>hxy!4J@aziiQ z2bHhEU*&Rdil3Ny9D!ZCdkxROK>z0mZ+KlTHe(!n z1%3K)$p$mwj)zZ=FB3ohyAmB5EDj+dan5Ws$$N$3shSj&nE_=?HH<`jaG6-?V`GNU zovsZ@Jqo2Y%)T^+H@k;-;7ABNC*bH_iPhQ65AR6e(Oj|dmF}i#p1IjQl1jDnO?pwsI!3E+CdQy42!pz5P)`d`p=&GCLf}6@MgidiiEGD)S#2?z3mu}Ga&f{%@{7L9=Cb^0s7%sP zF~0NEGd+x~!?C-S>1Vr5eClsP!{VHbWzt6<$x$cO(%Ft;^xV|G<4xgz7MBUeY}YtV z5bExJQ>f0qcg7(Uii6iO*XgE=Fxw^_S|mr41ll&$i9FfmTOF zI^wt8E|Eue2zM`eD5R4QMtPgi*Sowa!p*~6umC-kZLB?9U@1u0krBhra-V5CoLcb) zr9gWVEk4gM^vxT6naNM_l@RB{5*}BayJ*>Q#VQluL4}5$)a-H2blAc5c0bd{z9Q&@ zD>xgM00Rmt+2gb^u5R0WYl)2RBbvqt`bf}E6zWM+6v3ULo02YH6H|ocAlI|6lD6IR z7lazq0`<=>2#=YZ=*3Cs#Yq!4;n!T)f*x_5Rw$dAGI5nMcS-Zxl=&QKo}|pLk7N8F zvepUtccgi*GXK>$;TrTEQRdG`^hcC=^EkmgEoI^+Wxh_LuTthNX?~kBpCiqal=<}} z$v%D&$NYasn)fR6UrF;L%KVw6t|^9>G$j!p>$m1hN|)T4q$*06P|*d^^swtN_97RU zPP|fU)8ea&kWo!kZSAISD&eP^6kLK5*XX%mQl~0f5?#eqi(iaUjnf_l`hnZMIZQw&i(`Sps27n=By@tcnd)px8_C3c;@;{~-4uPICC?^(9IlwV)Yue`II zU$F+3IFn=5veJ8b$33eZt5>ZOme;Ndx$kr@D=l5UV!dbCniVDM?{s@sm#?{(>JP_y*` zqrWP!(MK;Ef&nAA8okCUFx5P;wZ6eyZOp0;dX0g4VAsj1|A(k)(%f3{-8c zu4Spg+P<-=!Ry5zE}jf+ z4ZtNiqvRMxRkO8*ysvqviULET>#KuxUMzyKzMUZIKTA2rq1(($)99Y?X==zGyJs?%i6op^p5$k@;JSiq^#6;N9lk z8W0vXF1xM4+gM+>4ePeWj|3a%XEvs1HqJNJY$a!_0>(@~NeJI<^p!Vyy;~Y-wgK!) zfLO1pO#yEM*e4i`S*VCWfickVAo0PC&C+TvR4Q*t{Wg?QO|^GvUCoAt!t{Vq$P47H z{g+iHxkusrzsn4ZbWGcNfYToW>`zbCm{O7)fasBtF$H-M zh?tDj7s<|Re#q&K02Zc#v;pCfksbx<17eqq^ef0iK%A11`xN9wAO-+PSwK64mtkoV zoc1KA*8&UxsziS=WAYiYfG`2_WP(W{lmOuYtd$A-6oMa!9e`Gu(4r6y0dW{`TqbnL z1Z@ux{Q%~bh?fy%qC3dx(*TT;g_k^eVpxfnHGoRMEe;2OXaTg!gc}YY1mY;*6rc|Y z;g-oE_5*PhFaqGM$;e76fpz+eK!`H3LqW!Y&>oWXwkXI{ATnfRw~RE{fG7rdhyY(J0c6bJ1ELAA z7cc&;bc0d4_HvE&_%D9CBq;(qi~A>IuNqNJ*?Scz~z_>;-s$G%1FU1JMr{0knWn zED^4D`R_WV36%%x?yY&qTjH(qbL+R%tRFicdzb*f3J2O)z8mwe@s)d*(H){HAaQew z-haSDE=f8WyB4kfR$|w}Rf|+FHLDT`x{8I@uPqwb{e=@3il+|C{{@`-}op z!Se-AI|HtVTpJ6|7OIO%i=HWZy$DK;97mU`EYlOFBc_-s-yAl-YW|~nrsbQK`z^JW z?^&8GAvg*k8M zoXvL>=$#9kPUkGww_O`tn_Y>8lMCk;awh!02pDTHy>G6toVPq~J7UYWziS_|YjcWo z^735yPv#%Xzg&>ySnRml(d&5IG2j?=7@RYmp**MCS?>J4^GD9-of}-}U0enfHQCy2 zAKLzIyJY{|UYb*$b6?JPbE35!Y_Bn@~C9YDJ&(-5P7jj*76&J27 ztSmfL*j;$JFt5m2bYIc&qLY-u764Y*WU`r#nL16!O{Yv*=4`XcoQDNFYCdK@ZZ=vn zELj$l<&dSra@carl4?!E(jBzwZ3f$3+diAtuCwdysrFs=R{LK20XvtY$i z&0-DY53p!s`D_YdiD_`%Ovz3qj0OTmS$7 delta 6377 zcma)Ae_T}8m49y>^nn9BU@!=%gJ9GM$iNIU%r9u_+p>wXeV}7Zqb)>82E>HwU`Zki zWUv87TFmv+X1g{^8X8S&S`w@=E$NPiMRu2LNn;F6G$mbAA40a%E}KrWld#`=XC5*9 zwey*C-#zDh&yRcVz31JTjAk9Kn$zU0hc{KoXEC@L56+$K7rg6ACC<2h+Mndgo`jheBN3z~c_ zM##r=vQi6u^{qYt`7Sincm;Wf&oxqa)W|*QTFB1+3z`~GKIkq|s96^e zqnsApVX_MxoHHAblE2qvCf!&D0Aa8J9mCIQYWVh50>{m~s7I5ZiRSG@G>^mW%wCL} z3CE%F$D=7+hvwdFG3(XH;fxM{w0g>bM*)^P~jKZ#eRM(iG`~rt&WI7{VF)~zkl_>CkqS6Sv07s zfJax2Y0AphPliniPw*P03y`@K2LMJYPdjic>3T(6KckRqMY&g4%;bPSpdg zPjw|V^2(|NwwAJzm_Xh)rmy@68w_#E%7^iczoPQ5NBMIq|3Z}ip33({`NJxIAf9v< zr?1?l^6n^Js`3T#JN#$0r~CA*z5^3%>8@na>s$WN)M$TiriP{v)+e zAC+WuKvFqN7e;q9NZweNMsm|u-$ucszx8FPiY7YjWa<}KxOVUs`uuIa}r`Y5C5POV_ zvPW==Jw_28*C!3h-i}7t>5bayktY3Zlbr7%ZwR9T?#U`8D~ddOdIK-|owVhA`s8z- zPmviN+u%8DgcgSG=S1wc&y?5>pYXR$a4qp;af8s}XEVLpPPlr3!CGLNmK9hj8c6cF zL^|f^zxr0GJ`(8xT1aPZN3{WIJn+HZi#m=RPq`Qew|ieBO2+!!{u>Jm0k_b@4^aKR zuT8KIN)?+{Bd2`-1}Rvb8BZsP(0n(Dy!xIfr&07b$k6ImzKCqjH1nzC8<|JqJ2qn+ zr#zlYu4g{Wr;zTf+tL0!YYW2$^^tnS- zeV%Z2BiG0+r0a@^bFDSaaY>q>vkXW&D2!Gq1{B7%-`2$aCy8J81P*LGncOJsi_kHB zQUv8ds9H$RF5<-=Y0lp^#~oQtYekE;I5_J>G4Nj$dpu!ZDoTgHRm=I}FG=%syUik< z^VEpHZ60~QB!wtLr6CIUN+HzI4Nn?U3O+a87B}-EFz@ISSQ_yUbmdUNls?ZiN{%Aa zFo?_W78zcz7o`8i@lk3M$#2#-@V_Qyx&K_0f+YA`1oTT*&Fk$@EGk-~6- zWGk)(wq<|G4(&yb4oex+iZe9BN*p_d7vATnyVM?S3bL3-LUD2J`E(L3D@uYh4?q)c zWM%NvSPEHrPd<7V0I$$Z50kD+`Scp|gW|q-Ou8NzlRj7&lm6%#lWw4|%#BGic-{m* zs~eN%(ErdqCjFN%CjDt|qYmz?`=sl#k`kTa(e_|VYAnl2!J^-pDY~8_b&32bfkl`1 zk4cwd@-4W$0vB(iKLwY+1OHCTnDnkVCS8T>8nE}^{`crl!~Oe0V-lTOpZju^@_l9v zMG~Gp?SlVeQnfN1QKvjx>&>QqvCkF#qTlUfQV6#G3_pK??f*u99)AA^&yQgLub5tV zZ(}@Nh(1rKS~1@+{n_|{G>=;Z-opFc z^K}7_=yxP=$z9(;Qua&pb;~3is&MRHgHxq#TfN^M(FZ(X|B(be=UWK}Og%FDK7)4z z++nu6bnILze}{_|Hgb;?^0$RJpJ~4b+ETbU;12a{7h#CLtHXgxy7NPHQ$mdz@H%?1 zk^kXcnyZYN-PmyCn`_DtdYY#0UGj^~cO2-V14K4J@b-g2D6*0f6(pYG|Y4r14b0Ny5ujpctC)6+)v(wW{6)r*hXBQeU!edu)$~& zp8drgeEY{NUzm?u=-bKbqX&i_)fGeh%vFE^+)Jvoc8`up{tXfwTdrj_%^wOv3pPA1 zF1ns7EfjtV_>vT+H|0LucN#0`xr-nx%G?wUnO=9+(4`e(^fQln zJqcP>O8&>hlV6&9`QumyBCHx{Y28KY49Z@)3N469` z+Ad&vFoapMGETQ)Y#-6VEw*Sj{ElAES4b!7)&=yP)n02ag37&e%S5PHX<-r+;_&Rt zOh3gd(~&!(a@?=C#&%Yf@+vBff88i8hNzY%jQZ7E%wF=jAks=NT^SVIQ3 zCboA*arFv{`WTt{0I_?Iv7#YXD|OKkP|x$8gwT7MBJ^0Jd|>>3)N$Rj`<0G{?d0AeJfIG1%C`Q0P^5E;sCXR+CUAUi;!Oi{SovDD6x9@Y4seEFqL<>~id-}ox2`|2KEJDk&5u6eS$dAR%Vh<145v7K=l%{83Z1%j`| z&%j#z(7T}7p3x6rU$-rzz!9h10dj*yqG(522?N`fOQEN zR6!X)(-Lr61-k$YE&(}1be=*Gz&Hp?v`4s%&ea&BSI`N8A|M*kbW}ywi~Be+17rYE zU_b?mAh3eA#Q-jc*_HLXA*cg2$AFNk(FMUV(8(Ae#x#U85R8CWSd-{XtIRw)+PWAb zqGO0L`h0X8pi0me0^S3G7t|aB76a~ppdT~{sz6N5YQQ59On|0A%>V{eAPj-Fo)h&T z0{~eKI3EHBhz6{e(WwKqf@mZad9g&CfM5`GCI&15T+91!dTF@H_ybb(HSsJ|N3Ul4+;pl}Rmjsar!gPgbtL`P3X=de?>LQokKitCx} zo#Gw{yfI-&9oY&&S4>zG8>v45!2oCo)C}Qd6p;1f5KMvQKr)0wDxmdZRv-hYAHq-! z5KRzNf@pX}m^Nm)Q(pr?E9eABhOk4OEL6ab=v6$6q2e<&DB-p!)iO}-yxAd@30gd ztgmZ0SijHL@bE*12WlId>gx;z1%{SG4G%qNI9UH+OH=JZ!-3kSrib@!ec)hyeIaUV z|8k;ubi=K?@_BOlo;686Z^I#sdBm`mvQS}QR1a}sjA@{Z@E|9@16$9p@M#eIqA z+W6rMn)Uo}^LI~Z+hrL0va!q9Ydmgz);MHLELm5wu_V95RN^SPv*c$b>88yluj!kn zM@>DZ@0y-A#hFvhS>_z`9poO6j%I>!tIh36?BNj>T!&W%;`0QA?NQc)R6GOPsOWBzHLf2XPd3&X0yk#*ZQ(`!Wt>N-FCNa%=WHr+O}ZR z+gIDIcDKFS{+RtA?e{w-9KUr;Io@+zcieP@93MI8pT+oc9kwpp3EOp>Vhh_U?d{v` zd+cZIL-wFubm$#<4xgjd(GA6`jyZ?JS>cqO1I|I`S*O+|xH4Q`SCi|AYr-|@nsybH zo65J9pD2H-{B(J^JW|dz)5YL8UD2MRnxbIQXi=VVlQG|DGIkq}8T*Y-8Rv{4K3jagy?CT}v^b+AyW~hoN6B=_^^$F-O4Di68PkyIyh&#k&3eqb+3Yj7 znva+}%$Lnm=Bwsu^L4Ye)KOYdx~;Ub^knHkDQD4I1dC{?vv@5{7N2F@GGUptOj+`+ zMOKs5YIRuqttYJW*042V)t2op+f!Ck<}Dj3YY&!B051_lu3DuUv7K>FRpoX*M7jOvq^`6nAQYD|95#5S3qnQQVfE`A`* zK3STPbMjP1fyt8?yY#b%7 delta 121 zcmew$_Cahym>AbP1_m%L0@6Ut`2~b0=5$Ve&%`#FpP6g&F)n@}%|2O}(P6R&v*=_l zMxn`583iU!V(ij?$Hl-<1jJuB85k;n_zwpI11|#u10w?y0|x^;0~-S?0}Fz+*_%m~ F698;87}@{; From 9a5189aa834a62a8ee96867693d622ab0d6d158e Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 15:42:22 -0800 Subject: [PATCH 100/293] add vs2019 project filters --- ide/vs2019/mimalloc-override.vcxproj.filters | 72 +++++++++++++++++++ ide/vs2019/mimalloc.vcxproj.filters | 75 ++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 ide/vs2019/mimalloc-override.vcxproj.filters create mode 100644 ide/vs2019/mimalloc.vcxproj.filters diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters new file mode 100644 index 00000000..bc1e4c60 --- /dev/null +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -0,0 +1,72 @@ + + + + + Header Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + {f1fccf27-17b9-42dd-ba51-6070baff85c6} + + + {39cb7e38-69d0-43fb-8406-6a0f7cefc3b4} + + + \ No newline at end of file diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters new file mode 100644 index 00000000..b2282df3 --- /dev/null +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -0,0 +1,75 @@ + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + {2b556b10-f559-4b2d-896e-142652adbf0c} + + + {852a14ae-6dde-4e95-8077-ca705e97e5af} + + + \ No newline at end of file From 5c8721f0b80f6f5218f37b102f50c31ddbdda7a7 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 15:59:54 -0800 Subject: [PATCH 101/293] update documentation --- readme.md | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/readme.md b/readme.md index 0d11db16..44f62230 100644 --- a/readme.md +++ b/readme.md @@ -56,6 +56,7 @@ Enjoy! ### Releases +* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode. * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. * 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements. @@ -138,6 +139,10 @@ target_link_libraries(myapp PUBLIC mimalloc-static) ``` to link with the static library. See `test\CMakeLists.txt` for an example. +For best performance in C++ programs, it is also recommended to override the +global `new` and `delete` operators. For convience, mimalloc provides +[mimalloc-new-delete.h](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in +a single(!) source file in your project. You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): @@ -188,18 +193,18 @@ or via environment variables. - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. -- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly +- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory. - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions - show in the working set even though usually just a small part is committed to physical memory. This is why it - turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better + show in the working set even though usually just a small part is committed to physical memory. This is why it + turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better to turn it on as it improves performance and has no other drawbacks. - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at - startup and can give quite a performance improvement on long running workloads. Usually it is better to not use - `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving + startup and can give quite a performance improvement on long running workloads. Usually it is better to not use + `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented. Still experimental. [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 @@ -211,7 +216,7 @@ Overriding the standard `malloc` can be done either _dynamically_ or _statically ## Dynamic override -This is the recommended way to override the standard malloc interface. +This is the recommended way to override the standard malloc interface. ### Linux, BSD @@ -244,29 +249,29 @@ resolved to the _mimalloc_ library. Note that certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). -Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this +Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this (see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)). ### Windows On Windows you need to link your program explicitly with the mimalloc -DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). -Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available -in the same folder as the mimalloc DLL at runtime (as it as referred to by the mimalloc DLL). -The redirection DLL's ensure all calls to the C runtime malloc API get redirected to mimalloc. +DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). +Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available +in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). +The redirection DLL ensures that all calls to the C runtime malloc API get redirected to +mimalloc (in `mimalloc-override.dll`). To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some -call to the mimalloc API in the `main` function, like `mi_version()` +call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project for an example on how to use this. The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic -overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc successfully redirected. +overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. -(Note: in principle, it should be possible to patch existing executables -that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the mimalloc DLL into -the import table (and putting `mimalloc-redirect.dll` in the same folder) -Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)). +(Note: in principle, it is possible to patch existing executables +that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the `mimalloc-override.dll` into the import table (and putting `mimalloc-redirect.dll` in the same folder) +Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)). ## Static override @@ -282,6 +287,12 @@ object file. For example: > gcc -o myprogram mimalloc-override.o myfile1.c ... ``` +Another way to override statically that works on all platforms, is to +link statically to mimalloc (as shown in the introduction) and include a +header file in each source file that re-defines `malloc` etc. to `mi_malloc`. +This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-override.h). This only works reliably though if all sources are +under your control or otherwise mixing of pointers from different heaps may occur! + # Performance From b820009df733afdd933cc70d29392593da837466 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:09:34 -0800 Subject: [PATCH 102/293] update documentation --- test/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/readme.md b/test/readme.md index b74364ff..db3524cd 100644 --- a/test/readme.md +++ b/test/readme.md @@ -1,7 +1,7 @@ Testing allocators is difficult as bugs may only surface after particular allocation patterns. The main approach to testing _mimalloc_ is therefore to have extensive internal invariant checking (see `page_is_valid` in `page.c` -for example), which is enabled in debug mode with `-DMI_CHECK_FULL=ON`. +for example), which is enabled in debug mode with `-DMI_DEBUG_FULL=ON`. The main testing strategy is then to run [`mimalloc-bench`][bench] using full invariant checking to catch any potential problems over a wide range of intensive allocation benchmarks and programs. From d55ab50a84250e335337724b6e002fd349e35226 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:10:04 -0800 Subject: [PATCH 103/293] update version to 1.2 --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- test/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 03316948..9d78b5a0 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,5 +1,5 @@ set(mi_version_major 1) -set(mi_version_minor 1) +set(mi_version_minor 2) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index bc817f54..7f26896c 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 110 // major + 2 digits minor +#define MI_MALLOC_VERSION 120 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8bf36521..a80dde58 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 1.0 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +find_package(mimalloc 1.2 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") # overriding with a dynamic library From 7586225fc5c6327e4b16a0abd2b4d75c37e497f6 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:10:21 -0800 Subject: [PATCH 104/293] add secure build to azure pipeline --- azure-pipelines.yml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 79228c41..41d67f86 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -35,22 +35,32 @@ jobs: CC: gcc CXX: g++ BuildType: debug - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_CHECK_FULL=ON + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON Release: CC: gcc CXX: g++ BuildType: release cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + Secure: + CC: gcc + CXX: g++ + BuildType: secure + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON Debug Clang: CC: clang CXX: clang++ BuildType: debug-clang - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_CHECK_FULL=ON + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON Release Clang: CC: clang CXX: clang++ BuildType: release-clang cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + Secure Clang: + CC: clang + CXX: clang++ + BuildType: secure-clang + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON steps: - task: CMake@1 From c6c24f9c2efb793a201e531057f25ea914792d3c Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:17:47 -0800 Subject: [PATCH 105/293] update documentation --- readme.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/readme.md b/readme.md index 44f62230..e5a870b8 100644 --- a/readme.md +++ b/readme.md @@ -37,7 +37,7 @@ Notable aspects of the design include: programs. - __secure__: _mimalloc_ can be built in secure mode, adding guard pages, randomized allocation, encrypted free lists, etc. to protect against various - heap vulnerabilities. The performance penalty is only around 3% on average + heap vulnerabilities. The performance penalty is usually around 10% on average over our benchmarks. - __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions. A heap can be destroyed at once instead of deallocating each object separately. @@ -65,7 +65,7 @@ Enjoy! ## Windows -Open `ide/vs2017/mimalloc.sln` in Visual Studio 2017 and build. +Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`). The `mimalloc` project builds a static library (in `out/msvc-x64`), while the `mimalloc-override` project builds a DLL for overriding malloc in the entire program. @@ -98,7 +98,7 @@ maintains detailed statistics as: This will name the shared library as `libmimalloc-debug.so`. Finally, you can build a _secure_ version that uses guard pages, encrypted -free lists, etc, as: +free lists, etc., as: ``` > mkdir -p out/secure > cd out/secure @@ -141,8 +141,7 @@ to link with the static library. See `test\CMakeLists.txt` for an example. For best performance in C++ programs, it is also recommended to override the global `new` and `delete` operators. For convience, mimalloc provides -[mimalloc-new-delete.h](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in -a single(!) source file in your project. +[mimalloc-new-delete.h](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): @@ -264,7 +263,9 @@ mimalloc (in `mimalloc-override.dll`). To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project -for an example on how to use this. +for an example on how to use this. For best performance on Windows with C++, it +is highly recommended to also override the `new`/`delete` operations (as described +in the introduction). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. From f27c87c03cac0b5344c5f715377478375e145b3f Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:19:47 -0800 Subject: [PATCH 106/293] update documentation --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index e5a870b8..f68d79a1 100644 --- a/readme.md +++ b/readme.md @@ -56,7 +56,7 @@ Enjoy! ### Releases -* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode. +* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. * 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements. From b7d9ee8830f1e77eba002f26ac65f498e5cce0e6 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:20:35 -0800 Subject: [PATCH 107/293] azure pipeline logo shows dev branch --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index f68d79a1..feee8704 100644 --- a/readme.md +++ b/readme.md @@ -1,7 +1,7 @@ -[](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary) +[](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary) # mimalloc From 42a0666770688c4c39197320712e9d5c9bcc9dd7 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:24:10 -0800 Subject: [PATCH 108/293] update documentation --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index feee8704..9d3974c9 100644 --- a/readme.md +++ b/readme.md @@ -56,7 +56,7 @@ Enjoy! ### Releases -* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). +* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. * 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements. From 41caf6d0f8487ff856f6a10adf4a7fb016df9341 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 21 Nov 2019 16:29:46 -0800 Subject: [PATCH 109/293] set secure default to 0 --- include/mimalloc-types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 9c5d3c19..96e1860f 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) -#define MI_SECURE 4 +#define MI_SECURE 0 #endif // Define MI_DEBUG for debug mode From acb03c54971c4b0a43a6d17ea55a9d5feb88972f Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 22 Nov 2019 08:57:51 -0800 Subject: [PATCH 110/293] update redirection module on Windows for improved trace messages --- bin/mimalloc-redirect.dll | Bin 55808 -> 55808 bytes bin/mimalloc-redirect32.dll | Bin 39424 -> 39424 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll index 77e40c1391dc4e3f6fa08c30c1fe461df1b0fa80..b7bf1d099681fc73d15ee8dd1b86a48de0aca1d1 100644 GIT binary patch delta 748 zcmZqJ!rZWhdBP3m&q6mg-ezTFtls>PF-(-PezJwQ^5i+<@{_B?l|*zHfZ#R9aTgUG zMj(rS8;~)1gSefp0f-BhFaRlOu2C^yDCGjGGXZhI5+*PmCX=Nl3`H#95*9ECi^&NR zn$|XOqijG*fJWIsjB=x|X{1i3=7^#FhCxyeT)lto0HDuwCvTM$)D3~_41wwlfjB7wt}6njD`N6z zNfnV8xI_$0A_i<+gp{Ce0^GO+sBsAp<5J+dQee7LCQp-66Ul%}WWXdcCSQ=U)6Ib! zl>;>@2VztKTt@*+N5N!k={1b2CclvOuU7#Dno{e55@}#Cs4$d>c=YlLMlmqFIRE_r ze_*t!KuuF&DCIoHy3B`>f$_B_NGVgP7*MGjNa+%Y(h{&uJV4O~kZ2!Jlz$s*2~Z2r zcs`(TCrJ1VQ1~@S(QFV;ITNDFAEHX!quJIQ$Ydzt?e0-|0c89awb^`Grk!zff}FJ7 z=GvHcMnwh&VA6~QS;hp!=|JpK2*mLq5wP55ifk~K5PLR;FwtyRD z0a5}q$^v4P4P1u}OovT}i;5!1OvTm%{H-S@ACXX=yhuWP@;?b>T^G1HE>Lq^Am(_$ z&GCSl<1yJ!Qcc7MF5v@{@R_^;=*z8=g1P~4;{u?@1wf1of$Iu^=?a1!y84P`Do?d diff --git a/bin/mimalloc-redirect32.dll b/bin/mimalloc-redirect32.dll index 9e5fa86580c786c30acf7e28a84f6ce516fe943a..7ba303af50079c0f0fed334190abf896674d9806 100644 GIT binary patch delta 2091 zcmYLJYfMvT7(QQZ1xHy=xuXb$Rc!Eh-R33_o5KyO++YDAzThYxIim+BEr-O z2ph`EX>@uk4B;3xe@o$O1|c*Vfr&uKr@fzIUSD4y z?9(0<9I7`1$rU&q6bIL1Gz$A;xF3|P^nb0LCbV&(aV!|2Odq(n}fv(Ug+76lR z5LsIX{DleaazeY5&@Lvl$=)`}+XlH|tAmf%Rv%|HyAPwthg2}E%YGbs!nhrynoj%k zx=WQ4YpX|@$`C#gHSg%i%a^s?${yRO6g-N-7gY^&2up^O(5P^R@%7YrRW=?I7Y*bG zD0^07HJzcfrS4j#hWAnal*A7Q^6X}u?#1aOTt?F=M%TG3!)mRK*uhi=y6ozO!v653 z1l-C1@I_1{3iE+<}U$lpwRSh-TFv!wRC*xt8KE?0g zZJC`S^y{Mt;o!j(8(9U$_^ISFtctgj-{Fh+M6w8O#AlLSz$P5=yFNslSd0pX{)8S< z4f_+DU-|7P%0}Hg@&a<=its#2Xh{ktd!Qr9kZce|f4+VTK8bD2;k8PHrn46xrO1^x zmdo!$&FcrRCdZI;n3fzKIz#fTC*G^KXtr`a$h`J|+4I^CEZh5KFT%Ja|VkE6?52(5;r z_X?pUp@P1l=KRG8fXX=9B^n6PU{x(<84-)@&ObHIh8XP&4VHnhe*A}FW zW1aUO)t%2H^I9+evE%MHw=N3BIgv_|F1)iS$PX{DX?c~>V`W%4uUHCvz79XYrser? z2P>Ue6&C2oPN*rUB8B37=?Wcb5vu{LJYwZosUvRKw6Y4jiwg50dll`bRX0{yVwF{> z!-S&3Domrwj3SzYmC#Y-Pry)I3W;l2G^Z7pz%i^^#7bApLdTj?I8e;u1EF>_Vy!}S z?|$n)_g-GD^2}yf7PS0U6`RGdSy+|8DDrOPeY7obUNgfwaL~&sgqn3#xZ~%D2H3v- z8J^>p4JF_yp^<-#P;lDr@Mh=vnEr7z4v(<571R9;K~KA4)%x(V4-S9-78X5oU)udxkyp zv&^o{k(pr@izVN3!!m5~Su~uUOX9M*72Imh#hu{Jaus?jlf^K4464sKnDI0tz;xd< a&Rk$#ZRX7z&6Vc2%{$HS3dRq=Q~nFY_WP6o delta 2068 zcmYLJZA?>V6n@{X9jIl!g#s2Ulv*ty8^zK}N6T=onHh9eHWH^yL2$cL6x5(*5wlB94am-n6ZUzxS){OWzN(=GUmtTrs&Rl>&lwnCLbtX-7q^YZN*(M)ioqj)-i@2wc+GO~aDI zaKvF)bQqGfLlNx|q{kc70}?wRvhD#WjklZnC5QfqL%-2fI8Gky$PsZ@vb}T=TWC=0M8sD4{aJFJbd@;=>%3|Sg;%eHiO`;sk z)9R@X99jeY2i9xX(?h~JZ6>8em`eJd(p&JgE`@Fq#&r%E&4#%tKrcXdN(=RYUH=!Y z2Xm^Hn&72W8|{IH)LuFx`qD&qo`%^rj7Oda*W%b7KFH=NDvpoPLKPamO+mtSVm z=w{cwW;fTYX($eNiH!Um*PY2P1NC=Eotj^<6JNC-bXh4+9<^dR4$HDqXg`!^mD0E2 zY*s4W1feV~eF^@}N`BQmDq1%7*BN=fj&OV}0=u-ZI@0lk8}5pdcU%u9o%}A=c5$BY z-#`67*u(}~S;wth6&{0XbK3l~k-n<8AH8+B-c`*d(Zzfxs=~GKh4~o$14^2&{KH?-CoR_Tzeyv_UEfv?->hX(gTeB9* zO7*lB8cO|m=&WA-x}LU**(hcsV&++=r$N}d&QGVsdc}G+U0${p1~4;=SwWc|9m>o6 z=*Bv^<;({&p{G2WBAH$3g|rF|WA@<9cTh_3&2JecZ(t+r1F- zGRr@`e(-s#VXRuIIQl%{f@1I0%`Lb$rYhRPL1=R|3~f=;GFVkp6EpTPUKQbEwG49;)$s`*a{w+o{MBeaZg1&oE@R;5CZntfR9#5v^J%nIn* zHb)UcnuA|9d%?I}I-jH4|6d+%dpY6sqT)lJ5;uVhZ8OW&o+E^-#%RItpu{XnTI-a! z!qmDN7?JH3ns!7d@118!w=A`|Ev1%?7M~?xIcWLFqOlsSFIrDpO}25{pgr$d&auGZ za3ncjb8c{Yor?UY@(uaKOvnn{_BQjc*(FPcm$c`c$jP)`vrbu~Z3VX6T&Mj5d$)b! z*(dWB=N0B%%p1=O Date: Fri, 22 Nov 2019 09:28:48 -0800 Subject: [PATCH 111/293] bump version to 1.3 for further development --- cmake/mimalloc-config-version.cmake | 2 +- ide/vs2019/mimalloc-override.vcxproj.filters | 6 ++++++ ide/vs2019/mimalloc.vcxproj.filters | 6 ++++++ include/mimalloc.h | 2 +- test/CMakeLists.txt | 2 +- 5 files changed, 15 insertions(+), 3 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 9d78b5a0..f64948d3 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,5 +1,5 @@ set(mi_version_major 1) -set(mi_version_minor 2) +set(mi_version_minor 3) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index bc1e4c60..b2dea4e1 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -40,6 +40,12 @@ Source Files + + Source Files + + + Source Files + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index b2282df3..0cce0c4f 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -43,6 +43,12 @@ Source Files + + Source Files + + + Source Files + diff --git a/include/mimalloc.h b/include/mimalloc.h index f727a990..2944de89 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 120 // major + 2 digits minor +#define MI_MALLOC_VERSION 130 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a80dde58..ed204888 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 1.2 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +find_package(mimalloc 1.3 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") # overriding with a dynamic library From 0d3c195f376f32ba7de5124d19294a765aaf68f3 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 22 Nov 2019 11:28:55 -0800 Subject: [PATCH 112/293] update stress test with more documentation --- test/test-stress.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index 6b2fb8c4..b549e1b4 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -5,9 +5,14 @@ terms of the MIT license. -----------------------------------------------------------------------------*/ /* This is a stress test for the allocator, using multiple threads and - transferring objects between threads. This is not a typical workload - but uses a random linear size distribution. Timing can also depend on - (random) thread scheduling. Do not use this test as a benchmark! + transferring objects between threads. It tries to reflect real-world workloads: + - allocation size is distributed linearly in powers of two + - with some fraction extra large (and some extra extra large) + - the allocations are initialized and read again at free + - pointers transfer between threads + - threads are terminated and recreated with some objects surviving in between + - uses deterministic "randomness", but execution can still depend on + (random) thread scheduling. Do not use this test as a benchmark! */ #include @@ -22,13 +27,13 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 50; // scaling factor -static int ITER = 10; // N full iterations re-creating all threads +static int ITER = 10; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor static bool allow_large_objects = true; // allow very large objects? -static size_t use_one_size = 0; // use single object size of N uintptr_t? +static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? #ifdef USE_STD_MALLOC @@ -185,7 +190,7 @@ int main(int argc, char** argv) { long n = (strtol(argv[3], &end, 10)); if (n > 0) ITER = n; } - printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); + printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); @@ -204,7 +209,7 @@ int main(int argc, char** argv) { } mi_collect(false); #ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); } #endif } From 4a0d35afd0714f3c8d37957d3a8b384d0591995d Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 23 Nov 2019 11:59:19 -0800 Subject: [PATCH 113/293] improve secure guard page allocation to work with non-eager commit --- src/memory.c | 4 +- src/options.c | 2 +- src/segment.c | 101 +++++++++++++++++++++++++++++++++----------------- 3 files changed, 70 insertions(+), 37 deletions(-) diff --git a/src/memory.c b/src/memory.c index 214bf0d3..b29e18f3 100644 --- a/src/memory.c +++ b/src/memory.c @@ -302,14 +302,14 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // no need to commit, but check if already fully committed *commit = mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); } - mi_assert_internal(mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); + mi_assert_internal(!*commit || mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); // unreset reset blocks if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { mi_assert_internal(!info.is_large); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); - bool reset_zero; + bool reset_zero = false; _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); if (reset_zero) *is_zero = true; } diff --git a/src/options.c b/src/options.c index 9b6e4cd0..8975a6d3 100644 --- a/src/options.c +++ b/src/options.c @@ -69,7 +69,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 500,UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/segment.c b/src/segment.c index ffba8c0d..0b6501d8 100644 --- a/src/segment.c +++ b/src/segment.c @@ -123,10 +123,18 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t } #endif -#if (MI_DEBUG>=3) -static size_t mi_segment_pagesize(mi_segment_t* segment) { - return ((size_t)1 << segment->page_shift); +static size_t mi_segment_page_size(mi_segment_t* segment) { + if (segment->capacity > 1) { + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); + return ((size_t)1 << segment->page_shift); + } + else { + mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE); + return segment->segment_size; + } } + +#if (MI_DEBUG>=3) static bool mi_segment_is_valid(mi_segment_t* segment) { mi_assert_internal(segment != NULL); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); @@ -139,11 +147,47 @@ static bool mi_segment_is_valid(mi_segment_t* segment) { mi_assert_internal(nfree + segment->used == segment->capacity); mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || - (mi_segment_pagesize(segment) * segment->capacity == segment->segment_size)); + (mi_segment_page_size(segment) * segment->capacity == segment->segment_size)); return true; } #endif +/* ----------------------------------------------------------- + Guard pages +----------------------------------------------------------- */ + +static void mi_segment_protect_range(void* p, size_t size, bool protect) { + if (protect) { + _mi_mem_protect(p, size); + } + else { + _mi_mem_unprotect(p, size); + } +} + +static void mi_segment_protect(mi_segment_t* segment, bool protect) { + // add/remove guard pages + if (MI_SECURE != 0) { + // in secure mode, we set up a protected page in between the segment info and the page data + const size_t os_page_size = _mi_os_page_size(); + mi_assert_internal((segment->segment_info_size - os_page_size) >= (sizeof(mi_segment_t) + ((segment->capacity - 1) * sizeof(mi_page_t)))); + mi_assert_internal(((uintptr_t)segment + segment->segment_info_size) % os_page_size == 0); + mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_page_size, os_page_size, protect); + if (MI_SECURE <= 1 || segment->capacity == 1) { + // and protect the last (or only) page too + mi_segment_protect_range((uint8_t*)segment + segment->segment_size - os_page_size, os_page_size, protect); + } + else { + // or protect every page + const size_t page_size = mi_segment_page_size(segment); + for (size_t i = 0; i < segment->capacity; i++) { + if (segment->pages[i].is_committed) { + mi_segment_protect_range((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size, protect); + } + } + } + } +} /* ----------------------------------------------------------- Page reset @@ -269,15 +313,18 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_segments_track_size(-((long)segment_size),tld); if (MI_SECURE != 0) { mi_assert_internal(!segment->mem_is_fixed); - _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set + mi_segment_protect(segment, false); // ensure no more guard pages are set } bool fully_committed = true; bool any_reset = false; for (size_t i = 0; i < segment->capacity; i++) { - const mi_page_t* page = &segment->pages[i]; + mi_page_t* page = &segment->pages[i]; if (!page->is_committed) fully_committed = false; - if (page->is_reset) any_reset = true; + else if (page->is_reset) { + any_reset = true; + // mi_page_unreset(segment, page, 0, tld); + } } _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); } @@ -394,8 +441,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } if (MI_SECURE!=0) { mi_assert_internal(!segment->mem_is_fixed); - // TODO: should we unprotect per page? (with is_protected flag?) - _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs + mi_segment_protect(segment, false); // reset protection if the page kind differs } } } @@ -408,7 +454,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, if (!commit) { // ensure the initial info is committed bool commit_zero = false; - _mi_mem_commit(segment, info_size, &commit_zero, tld->os); + _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); if (commit_zero) is_zero = true; } segment->memid = memid; @@ -419,25 +465,6 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); if (!pages_still_good) { - // guard pages - if (MI_SECURE != 0) { - // in secure mode, we set up a protected page in between the segment info - // and the page data - mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); - _mi_mem_protect((uint8_t*)segment + info_size, (pre_size - info_size)); - const size_t os_page_size = _mi_os_page_size(); - if (MI_SECURE <= 1) { - // and protect the last page too - _mi_mem_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size); - } - else { - // protect every page - for (size_t i = 0; i < capacity; i++) { - _mi_mem_protect((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size); - } - } - } - // zero the segment info (but not the `mem` fields) ptrdiff_t ofs = offsetof(mi_segment_t, next); memset((uint8_t*)segment + ofs, 0, info_size - ofs); @@ -465,6 +492,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + + // set protection + mi_segment_protect(segment, true); //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; @@ -525,11 +555,13 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* mi_assert_internal(!segment->mem_is_fixed); mi_assert_internal(!page->is_reset); size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); page->is_committed = true; bool is_zero = false; - _mi_mem_commit(start,psize,&is_zero,tld->os); - if (is_zero) page->is_zero_init = true; + const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); + _mi_mem_commit(start,psize + gsize,&is_zero,tld->os); + if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } + if (is_zero) { page->is_zero_init = true; } } if (page->is_reset) { mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? @@ -759,7 +791,7 @@ static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); - if (segment == NULL) return NULL; + if (segment == NULL) return NULL; segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; @@ -773,7 +805,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld { mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); if (segment == NULL) return NULL; - mi_assert_internal(segment->segment_size - segment->segment_info_size >= size); + mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); segment->used = 1; segment->thread_id = 0; // huge pages are immediately abandoned mi_page_t* page = &segment->pages[0]; @@ -800,5 +832,6 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ page = mi_segment_huge_page_alloc(block_size,tld,os_tld); } mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page))); + mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); return page; } From 41ef691292caa2417ef7e954f8eb9db2b18d1031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Sun, 1 Sep 2019 01:06:01 -0700 Subject: [PATCH 114/293] avoid deadlock with BSD systems that call malloc from the dynamic linker extend the exception used for macOS to cover also OpenBSD (tested in 6.4+) and DragonFlyBSD (tested in 5.6.2) --- include/mimalloc-internal.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 452f0b68..2ddf3f16 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -10,7 +10,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-types.h" -#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__)) +#if defined(MI_MALLOC_OVERRIDE) && \ + (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__)) #define MI_TLS_RECURSE_GUARD #endif @@ -221,7 +222,7 @@ extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate static inline mi_heap_t* mi_get_default_heap(void) { #ifdef MI_TLS_RECURSE_GUARD - // on some platforms, like macOS, the dynamic loader calls `malloc` + // on some BSD platforms, like macOS, the dynamic loader calls `malloc` // to initialize thread local data. To avoid recursion, we need to avoid // accessing the thread local `_mi_default_heap` until our module is loaded // and use the statically allocated main heap until that time. From 727d33b96f9d120d022a9de1bf8b0f39f7645c15 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 24 Nov 2019 14:40:47 -0800 Subject: [PATCH 115/293] more precise memory reset --- src/memory.c | 16 ++++++++++------ src/segment.c | 50 +++++++++++++++++++++----------------------------- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/src/memory.c b/src/memory.c index b29e18f3..9505c98f 100644 --- a/src/memory.c +++ b/src/memory.c @@ -306,15 +306,18 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // unreset reset blocks if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { + // some blocks are still reset mi_assert_internal(!info.is_large); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); - bool reset_zero = false; - _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); - if (reset_zero) *is_zero = true; + if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed + bool reset_zero = false; + _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); + if (reset_zero) *is_zero = true; + } } mi_assert_internal(!mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); - + #if (MI_DEBUG>=2) if (*commit) { ((uint8_t*)p)[0] = 0; } #endif @@ -409,8 +412,9 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re } // reset the blocks to reduce the working set. - if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) && - mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use only `option_page_reset` instead + if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) + && (mi_option_is_enabled(mi_option_eager_commit) || + mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead { bool any_unreset; mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); diff --git a/src/segment.c b/src/segment.c index 0b6501d8..887248b4 100644 --- a/src/segment.c +++ b/src/segment.c @@ -320,10 +320,10 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se bool any_reset = false; for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (!page->is_committed) fully_committed = false; - else if (page->is_reset) { + if (!page->is_committed) { fully_committed = false; } + if (page->is_reset) { any_reset = true; - // mi_page_unreset(segment, page, 0, tld); + if (mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false;} } } _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); @@ -419,7 +419,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, // Initialize parameters bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); - bool commit = eager || (page_kind >= MI_PAGE_LARGE); + bool commit = eager; // || (page_kind >= MI_PAGE_LARGE); bool pages_still_good = false; bool is_zero = false; @@ -431,18 +431,23 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } else { + if (MI_SECURE!=0) { + mi_assert_internal(!segment->mem_is_fixed); + mi_segment_protect(segment, false); // reset protection if the page kind differs + } // different page kinds; unreset any reset pages, and unprotect // TODO: optimize cache pop to return fitting pages if possible? for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->is_reset) { - mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? (instead of the full page) + if (!commit && mi_option_is_enabled(mi_option_reset_decommits)) { + page->is_reset = false; + } + else { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? (instead of the full page) + } } } - if (MI_SECURE!=0) { - mi_assert_internal(!segment->mem_is_fixed); - mi_segment_protect(segment, false); // reset protection if the page kind differs - } } } else { @@ -491,7 +496,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); - _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); // set protection mi_segment_protect(segment, true); @@ -512,18 +517,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert(segment->next == NULL); mi_assert(segment->prev == NULL); _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - - // update reset memory statistics - /* - for (uint8_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (page->is_reset) { - page->is_reset = false; - mi_stat_decrease( tld->stats->reset,mi_page_size(page)); - } - } - */ - + if (!force && mi_segment_cache_push(segment, tld)) { // it is put in our cache } @@ -602,7 +596,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg // reset the page memory to reduce memory pressure? // note: must come after setting `segment_in_use` to false - mi_page_reset(segment, page, used_size, tld); + mi_page_reset(segment, page, 0 /*used_size*/, tld); } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) @@ -792,9 +786,8 @@ static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); if (segment == NULL) return NULL; - segment->used = 1; - mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; + mi_page_t* page = mi_segment_find_free(segment, tld); + mi_assert_internal(page != NULL); #if MI_DEBUG>=2 _mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0; #endif @@ -806,10 +799,9 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); if (segment == NULL) return NULL; mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); - segment->used = 1; segment->thread_id = 0; // huge pages are immediately abandoned - mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; + mi_page_t* page = mi_segment_find_free(segment, tld); + mi_assert_internal(page != NULL); return page; } From 4452431b6c66250776200b24465a01e03a393d0a Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 24 Nov 2019 15:25:19 -0800 Subject: [PATCH 116/293] reenable segment cache and fix initial segment commit --- src/segment.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/segment.c b/src/segment.c index 887248b4..9aba8525 100644 --- a/src/segment.c +++ b/src/segment.c @@ -348,7 +348,7 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t static bool mi_segment_cache_full(mi_segments_tld_t* tld) { - if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread + // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread size_t max_cache = mi_option_get(mi_option_segment_cache); if (tld->cache_count < max_cache && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache @@ -424,7 +424,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, bool is_zero = false; // Try to get it from our thread local cache first - mi_segment_t* segment = NULL; // mi_segment_cache_pop(segment_size, tld); + mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) { pages_still_good = true; @@ -448,6 +448,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } } } + // ensure the initial info is committed + if (segment->capacity < capacity) { + bool commit_zero = false; + _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); + if (commit_zero) is_zero = true; + } } } else { From c6df7a199c384ed0394e0e57475e6e866172b544 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 24 Nov 2019 22:00:11 -0800 Subject: [PATCH 117/293] experimental eager page commit option --- include/mimalloc.h | 1 + src/options.c | 5 +++-- src/os.c | 2 +- src/page.c | 31 ++++++++++++++----------- src/segment.c | 56 ++++++++++++++++++++++++++++------------------ 5 files changed, 57 insertions(+), 38 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 2944de89..7da7cf62 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -267,6 +267,7 @@ typedef enum mi_option_e { // the following options are experimental mi_option_eager_commit, mi_option_eager_region_commit, + mi_option_eager_page_commit, mi_option_large_os_pages, // implies eager commit mi_option_reserve_huge_os_pages, mi_option_segment_cache, diff --git a/src/options.c b/src/options.c index 8975a6d3..bb6718be 100644 --- a/src/options.c +++ b/src/options.c @@ -56,18 +56,19 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled + { 0, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled #ifdef _WIN32 // and BSD? { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) #else { 1, UNINIT, MI_OPTION(eager_region_commit) }, #endif + { 1, UNINIT, MI_OPTION(eager_page_commit) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory + { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. diff --git a/src/os.c b/src/os.c index 553d72c9..0197bafc 100644 --- a/src/os.c +++ b/src/os.c @@ -603,7 +603,7 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* // (but not for the reset version where we want commit to be conservative as well) static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) { // page align in the range, commit liberally, decommit conservative - *is_zero = false; + if (is_zero != NULL) { *is_zero = false; } size_t csize; void* start = mi_os_page_align_areax(conservative, addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) diff --git a/src/page.c b/src/page.c index 31c8fd5f..2992bf09 100644 --- a/src/page.c +++ b/src/page.c @@ -35,7 +35,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta return (mi_block_t*)((uint8_t*)page_start + (i * page->block_size)); } -static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_stats_t* stats); +static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); #if (MI_DEBUG>=3) @@ -242,7 +242,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os); if (page == NULL) return NULL; mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_page_init(heap, page, block_size, &heap->tld->stats); + mi_page_init(heap, page, block_size, heap->tld); _mi_stat_increase( &heap->tld->stats.pages, 1); if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL mi_assert_expensive(_mi_page_is_valid(page)); @@ -544,8 +544,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co // Note: we also experimented with "bump" allocation on the first // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* stats) { - UNUSED(stats); +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); @@ -555,8 +554,8 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st if (page->capacity >= page->reserved) return; size_t page_size; - _mi_page_start(_mi_page_segment(page), page, &page_size); - mi_stat_counter_increase(stats->pages_extended, 1); + uint8_t* page_start = _mi_page_start(_mi_page_segment(page), page, &page_size); + mi_stat_counter_increase(tld->stats.pages_extended, 1); // calculate the extend count size_t extend = page->reserved - page->capacity; @@ -572,16 +571,22 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved); mi_assert_internal(extend < (1UL<<16)); + // commit on-demand for large and huge pages? + if (_mi_page_segment(page)->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + uint8_t* start = page_start + (page->capacity * page->block_size); + _mi_mem_commit(start, extend * page->block_size, NULL, &tld->os); + } + // and append the extend the free list if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) { - mi_page_free_list_extend(page, extend, stats ); + mi_page_free_list_extend(page, extend, &tld->stats ); } else { - mi_page_free_list_extend_secure(heap, page, extend, stats); + mi_page_free_list_extend_secure(heap, page, extend, &tld->stats); } // enable the new free list page->capacity += (uint16_t)extend; - mi_stat_increase(stats->page_committed, extend * page->block_size); + mi_stat_increase(tld->stats.page_committed, extend * page->block_size); // extension into zero initialized memory preserves the zero'd free list if (!page->is_zero_init) { @@ -591,7 +596,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st } // Initialize a fresh page -static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_stats_t* stats) { +static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert(segment != NULL); @@ -621,7 +626,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list - mi_page_extend_free(heap,page,stats); + mi_page_extend_free(heap,page,tld); mi_assert(mi_page_immediate_available(page)); } @@ -666,7 +671,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // 2. Try to extend if (page->capacity < page->reserved) { - mi_page_extend_free(heap, page, &heap->tld->stats); + mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); break; } @@ -707,7 +712,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { if (page != NULL) { if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) { // in secure mode, we extend half the time to increase randomness - mi_page_extend_free(heap, page, &heap->tld->stats); + mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); } else { diff --git a/src/segment.c b/src/segment.c index 9aba8525..13bcf56a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -200,7 +200,12 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m void* start = mi_segment_raw_page_start(segment, page, &psize); page->is_reset = true; mi_assert_internal(size <= psize); - _mi_mem_reset(start, ((size == 0 || size > psize) ? psize : size), tld->os); + size_t reset_size = (size == 0 || size > psize ? psize : size); + if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + mi_assert_internal(page->block_size > 0); + reset_size = page->capacity * page->block_size; + } + _mi_mem_reset(start, reset_size, tld->os); } static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) @@ -210,8 +215,13 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, page->is_reset = false; size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + size_t unreset_size = (size == 0 || size > psize ? psize : size); + if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + mi_assert_internal(page->block_size > 0); + unreset_size = page->capacity * page->block_size; + } bool is_zero = false; - _mi_mem_unreset(start, ((size == 0 || size > psize) ? psize : size), &is_zero, tld->os); + _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); if (is_zero) page->is_zero_init = true; } @@ -414,8 +424,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t pre_size; size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); mi_assert_internal(segment_size >= required); - size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); - + // Initialize parameters bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); @@ -554,14 +563,16 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* if (!page->is_committed) { mi_assert_internal(!segment->mem_is_fixed); mi_assert_internal(!page->is_reset); - size_t psize; - uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); - page->is_committed = true; - bool is_zero = false; - const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); - _mi_mem_commit(start,psize + gsize,&is_zero,tld->os); - if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } - if (is_zero) { page->is_zero_init = true; } + if (segment->page_kind < MI_PAGE_LARGE || mi_option_is_enabled(mi_option_eager_page_commit)) { + page->is_committed = true; + size_t psize; + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + bool is_zero = false; + const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); + _mi_mem_commit(start, psize + gsize, &is_zero, tld->os); + if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } + if (is_zero) { page->is_zero_init = true; } + } } if (page->is_reset) { mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? @@ -583,26 +594,27 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); - mi_assert_internal(page->is_committed); + mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed); size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); // calculate the used size from the raw (non-aligned) start of the page - size_t pre_size; - _mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); - size_t used_size = pre_size + (page->capacity * page->block_size); + //size_t pre_size; + //_mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); + //size_t used_size = pre_size + (page->capacity * page->block_size); - // zero the page data, but not the segment fields page->is_zero_init = false; - ptrdiff_t ofs = offsetof(mi_page_t,capacity); - memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); page->segment_in_use = false; - segment->used--; // reset the page memory to reduce memory pressure? - // note: must come after setting `segment_in_use` to false + // note: must come after setting `segment_in_use` to false but before block_size becomes 0 mi_page_reset(segment, page, 0 /*used_size*/, tld); + + // zero the page data, but not the segment fields + ptrdiff_t ofs = offsetof(mi_page_t,capacity); + memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + segment->used--; } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) @@ -713,7 +725,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { mi_assert_internal(!page->is_reset); - mi_assert_internal(page->is_committed); + mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); From 1643273357ac13fbe698306776d35a9d25afcb53 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 25 Nov 2019 10:11:29 -0800 Subject: [PATCH 118/293] fix unix bug in decommit size --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index 0197bafc..6cf89c99 100644 --- a/src/os.c +++ b/src/os.c @@ -632,7 +632,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ #elif defined(MAP_FIXED) if (!commit) { // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) - void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); + void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); if (p != start) { err = errno; } } else { From 1d998af85432bc744275df7c9723821d947e796a Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 25 Nov 2019 10:47:17 -0800 Subject: [PATCH 119/293] clean up options; make secure work with eager_page_commit --- include/mimalloc.h | 6 +++--- src/options.c | 14 +++++++------- src/segment.c | 36 +++++++++++++++++++++++------------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 7da7cf62..94d9edfc 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -267,19 +267,19 @@ typedef enum mi_option_e { // the following options are experimental mi_option_eager_commit, mi_option_eager_region_commit, - mi_option_eager_page_commit, + mi_option_reset_decommits, mi_option_large_os_pages, // implies eager commit mi_option_reserve_huge_os_pages, mi_option_segment_cache, mi_option_page_reset, mi_option_segment_reset, - mi_option_reset_decommits, mi_option_eager_commit_delay, mi_option_reset_delay, mi_option_use_numa_nodes, mi_option_os_tag, mi_option_max_errors, - _mi_option_last + _mi_option_last, + mi_option_eager_page_commit = mi_option_eager_commit } mi_option_t; diff --git a/src/options.c b/src/options.c index bb6718be..c8df29a8 100644 --- a/src/options.c +++ b/src/options.c @@ -56,21 +56,21 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 0, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled - #ifdef _WIN32 // and BSD? - { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand + #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? + { 0, UNINIT, MI_OPTION(eager_region_commit) }, + { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory #else - { 1, UNINIT, MI_OPTION(eager_region_commit) }, + { 1, UNINIT, MI_OPTION(eager_region_commit) }, + { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED #endif - { 1, UNINIT, MI_OPTION(eager_page_commit) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/segment.c b/src/segment.c index 13bcf56a..f6ce939b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -165,7 +165,7 @@ static void mi_segment_protect_range(void* p, size_t size, bool protect) { } } -static void mi_segment_protect(mi_segment_t* segment, bool protect) { +static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* tld) { // add/remove guard pages if (MI_SECURE != 0) { // in secure mode, we set up a protected page in between the segment info and the page data @@ -175,7 +175,13 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect) { mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_page_size, os_page_size, protect); if (MI_SECURE <= 1 || segment->capacity == 1) { // and protect the last (or only) page too - mi_segment_protect_range((uint8_t*)segment + segment->segment_size - os_page_size, os_page_size, protect); + mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE); + uint8_t* start = (uint8_t*)segment + segment->segment_size - os_page_size; + if (protect && !mi_option_is_enabled(mi_option_eager_page_commit)) { + // ensure secure page is committed + _mi_mem_commit(start, os_page_size, NULL, tld); + } + mi_segment_protect_range(start, os_page_size, protect); } else { // or protect every page @@ -323,19 +329,23 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_segments_track_size(-((long)segment_size),tld); if (MI_SECURE != 0) { mi_assert_internal(!segment->mem_is_fixed); - mi_segment_protect(segment, false); // ensure no more guard pages are set + mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set } - bool fully_committed = true; bool any_reset = false; + bool fully_committed = true; for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (!page->is_committed) { fully_committed = false; } - if (page->is_reset) { - any_reset = true; - if (mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false;} - } + if (page->is_reset) { any_reset = true; } } + if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { + fully_committed = false; + } + if (segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + fully_committed = false; + } + _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); } @@ -442,7 +452,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, { if (MI_SECURE!=0) { mi_assert_internal(!segment->mem_is_fixed); - mi_segment_protect(segment, false); // reset protection if the page kind differs + mi_segment_protect(segment, false, tld->os); // reset protection if the page kind differs } // different page kinds; unreset any reset pages, and unprotect // TODO: optimize cache pop to return fitting pages if possible? @@ -514,7 +524,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); // set protection - mi_segment_protect(segment, true); + mi_segment_protect(segment, true, tld->os); //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; @@ -563,8 +573,8 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* if (!page->is_committed) { mi_assert_internal(!segment->mem_is_fixed); mi_assert_internal(!page->is_reset); + page->is_committed = true; if (segment->page_kind < MI_PAGE_LARGE || mi_option_is_enabled(mi_option_eager_page_commit)) { - page->is_committed = true; size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); bool is_zero = false; @@ -594,7 +604,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); - mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed); + mi_assert_internal(page->is_committed); size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); @@ -725,7 +735,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { mi_assert_internal(!page->is_reset); - mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed); + mi_assert_internal(page->is_committed); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); From a799a191360a060afc14ca686f5803bb26448e3b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 25 Nov 2019 14:30:12 -0800 Subject: [PATCH 120/293] fix non-standard line continuation --- include/mimalloc-internal.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index bf59656c..99e4b5ba 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -10,8 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-types.h" -#if defined(MI_MALLOC_OVERRIDE) && \ - (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__)) +#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__)) #define MI_TLS_RECURSE_GUARD #endif From a407f35c64321f02dbaf956893ced313ca7e199c Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 1 Dec 2019 00:01:14 -0800 Subject: [PATCH 121/293] add arena.c into the static override object --- src/arena.c | 52 ++++++++++++++++++++++++++-------------------------- src/static.c | 1 + 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/arena.c b/src/arena.c index 4a596b2c..90ea2b40 100644 --- a/src/arena.c +++ b/src/arena.c @@ -7,13 +7,13 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- "Arenas" are fixed area's of OS memory from which we can allocate -large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). -In contrast to the rest of mimalloc, the arenas are shared between +large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). +In contrast to the rest of mimalloc, the arenas are shared between threads and need to be accessed using atomic operations. Currently arenas are only used to for huge OS page (1GiB) reservations, otherwise it delegates to direct allocation from the OS. -In the future, we can expose an API to manually add more kinds of arenas +In the future, we can expose an API to manually add more kinds of arenas which is sometimes needed for embedded devices or shared memory for example. (We can also employ this with WASI or `sbrk` systems to reserve large arenas on demand and be able to reuse them efficiently). @@ -41,7 +41,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); /* ----------------------------------------------------------- Arena allocation @@ -82,13 +82,13 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0 // Use `0` as a special id for direct OS allocated memory. #define MI_MEMID_OS 0 -static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { +static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { mi_assert_internal(arena_index < 0xFE); mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? return ((bitmap_index << 8) | ((arena_index+1) & 0xFF)); } -static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { +static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { mi_assert_internal(memid != MI_MEMID_OS); *arena_index = (memid & 0xFF) - 1; *bitmap_index = (memid >> 8); @@ -101,7 +101,7 @@ static size_t mi_block_count_of_size(size_t size) { /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ -static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) +static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { const size_t fcount = arena->field_count; size_t idx = mi_atomic_read(&arena->search_idx); // start from last search @@ -120,15 +120,15 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena Allocation ----------------------------------------------------------- */ -static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_bitmap_index_t bitmap_index; if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; // claimed it! set the dirty bits (todo: no need for an atomic op here?) void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); - *memid = mi_memid_create(arena_index, bitmap_index); + *memid = mi_arena_id_create(arena_index, bitmap_index); *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); *large = arena->is_large; if (arena->is_committed) { @@ -152,19 +152,19 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n return p; } -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, - bool* commit, bool* large, bool* is_zero, - size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, + bool* commit, bool* large, bool* is_zero, + size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; *is_zero = false; - + // try to allocate in an arena if the alignment is small enough // and the object is not too large or too small. - if (alignment <= MI_SEGMENT_ALIGN && - size <= MI_ARENA_MAX_OBJ_SIZE && + if (alignment <= MI_SEGMENT_ALIGN && + size <= MI_ARENA_MAX_OBJ_SIZE && size >= MI_ARENA_MIN_OBJ_SIZE) { const size_t bcount = mi_block_count_of_size(size); @@ -177,7 +177,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if (arena==NULL) break; // end reached if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { + { void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; @@ -224,7 +224,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { // allocated in an arena size_t arena_idx; size_t bitmap_idx; - mi_memid_indices(memid, &arena_idx, &bitmap_idx); + mi_arena_id_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); mi_assert_internal(arena != NULL); @@ -254,7 +254,7 @@ static bool mi_arena_add(mi_arena_t* arena) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - + uintptr_t i = mi_atomic_addu(&mi_arena_count,1); if (i >= MI_MAX_ARENAS) { mi_atomic_subu(&mi_arena_count, 1); @@ -283,10 +283,10 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec return ENOMEM; } _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); - + size_t bcount = mi_block_count_of_size(hsize); size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS; - size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); + size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); @@ -294,7 +294,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec } arena->block_count = bcount; arena->field_count = fields; - arena->start = (uint8_t*)p; + arena->start = (uint8_t*)p; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = true; arena->is_zero_init = true; @@ -308,9 +308,9 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec if (post > 0) { // don't use leftover bits at the end mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); - mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); + mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } - + mi_arena_add(arena); return 0; } @@ -326,7 +326,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs / numa_count) + 50; - + // reserve evenly among numa nodes for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 @@ -348,7 +348,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; - int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); + int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } diff --git a/src/static.c b/src/static.c index f1656fa9..d31fca8f 100644 --- a/src/static.c +++ b/src/static.c @@ -15,6 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file // functions (on Unix's). #include "stats.c" #include "os.c" +#include "arena.c" #include "memory.c" #include "segment.c" #include "page.c" From 36d168a2d9880648c697761dbc6ec90211fd7b8b Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 1 Dec 2019 00:03:35 -0800 Subject: [PATCH 122/293] add preload check to options initialization --- src/options.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/options.c b/src/options.c index c8df29a8..0d3bd393 100644 --- a/src/options.c +++ b/src/options.c @@ -28,7 +28,7 @@ int mi_version(void) mi_attr_noexcept { // -------------------------------------------------------- // Options -// These can be accessed by multiple threads and may be +// These can be accessed by multiple threads and may be // concurrently initialized, but an initializing data race // is ok since they resolve to the same value. // -------------------------------------------------------- @@ -61,7 +61,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(eager_region_commit) }, { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory #else - { 1, UNINIT, MI_OPTION(eager_region_commit) }, + { 1, UNINIT, MI_OPTION(eager_region_commit) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED #endif { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's @@ -71,7 +71,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds - { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; @@ -89,7 +89,7 @@ void _mi_options_init(void) { mi_option_desc_t* desc = &options[option]; _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } - } + } mi_max_error_count = mi_option_get(mi_option_max_errors); } @@ -98,7 +98,7 @@ long mi_option_get(mi_option_t option) { mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option if (mi_unlikely(desc->init == UNINIT)) { - mi_option_init(desc); + mi_option_init(desc); } return desc->value; } @@ -142,7 +142,7 @@ void mi_option_disable(mi_option_t option) { static void mi_out_stderr(const char* msg) { #ifdef _WIN32 - // on windows with redirection, the C runtime cannot handle locale dependent output + // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. if (!_mi_preloading()) { _cputs(msg); } #else @@ -184,7 +184,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { out_buf[count] = 0; out(out_buf); if (!no_more_buf) { - out_buf[count] = '\n'; // if continue with the buffer, insert a newline + out_buf[count] = '\n'; // if continue with the buffer, insert a newline } } @@ -340,7 +340,7 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) { #include static bool mi_getenv(const char* name, char* result, size_t result_size) { result[0] = 0; - size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); + size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); return (len > 0 && len < result_size); } #else @@ -366,7 +366,11 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { } } #endif -static void mi_option_init(mi_option_desc_t* desc) { +static void mi_option_init(mi_option_desc_t* desc) { + #ifndef _WIN32 + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return; + #endif // Read option value from the environment char buf[64+1]; mi_strlcpy(buf, "mimalloc_", sizeof(buf)); From e31e609414d047aa198e5e59820a5f96c1a751bc Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 1 Dec 2019 01:03:39 -0800 Subject: [PATCH 123/293] add preload check in option initialization (issues #179) --- src/options.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/options.c b/src/options.c index 0bee74e0..d6b0558b 100644 --- a/src/options.c +++ b/src/options.c @@ -28,7 +28,7 @@ int mi_version(void) mi_attr_noexcept { // -------------------------------------------------------- // Options -// These can be accessed by multiple threads and may be +// These can be accessed by multiple threads and may be // concurrently initialized, but an initializing data race // is ok since they resolve to the same value. // -------------------------------------------------------- @@ -96,7 +96,7 @@ long mi_option_get(mi_option_t option) { mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option if (mi_unlikely(desc->init == UNINIT)) { - mi_option_init(desc); + mi_option_init(desc); } return desc->value; } @@ -140,7 +140,7 @@ void mi_option_disable(mi_option_t option) { static void mi_out_stderr(const char* msg) { #ifdef _WIN32 - // on windows with redirection, the C runtime cannot handle locale dependent output + // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. if (!_mi_preloading()) { _cputs(msg); } #else @@ -182,7 +182,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { out_buf[count] = 0; out(out_buf); if (!no_more_buf) { - out_buf[count] = '\n'; // if continue with the buffer, insert a newline + out_buf[count] = '\n'; // if continue with the buffer, insert a newline } } @@ -339,7 +339,7 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) { #include static bool mi_getenv(const char* name, char* result, size_t result_size) { result[0] = 0; - size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); + size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); return (len > 0 && len < result_size); } #else @@ -365,7 +365,11 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { } } #endif -static void mi_option_init(mi_option_desc_t* desc) { +static void mi_option_init(mi_option_desc_t* desc) { + #ifndef _WIN32 + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return; + #endif // Read option value from the environment char buf[64+1]; mi_strlcpy(buf, "mimalloc_", sizeof(buf)); From f9b942d80d0d51a18bcb12959b3f8f72803a981d Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 22 Dec 2019 17:08:46 -0800 Subject: [PATCH 124/293] fix compilation of region descriptor on 32-bit --- src/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index 9505c98f..3d6a22f5 100644 --- a/src/memory.c +++ b/src/memory.c @@ -79,7 +79,7 @@ typedef union mi_region_info_u { struct { bool valid; bool is_large; - int numa_node; + short numa_node; }; } mi_region_info_t; From ba87a39d9fcfab97fce28c16c7e1c799ee6af524 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 22 Dec 2019 17:07:01 -0800 Subject: [PATCH 125/293] updated random cookie generation using OS primitives and chacha20 --- CMakeLists.txt | 3 +- ide/vs2017/mimalloc-override.vcxproj | 5 +- ide/vs2017/mimalloc-override.vcxproj.filters | 3 + ide/vs2017/mimalloc.vcxproj | 1 + ide/vs2017/mimalloc.vcxproj.filters | 3 + ide/vs2019/mimalloc-override.vcxproj | 1 + ide/vs2019/mimalloc-override.vcxproj.filters | 3 + ide/vs2019/mimalloc.vcxproj | 1 + ide/vs2019/mimalloc.vcxproj.filters | 3 + include/mimalloc-internal.h | 35 ++- include/mimalloc-types.h | 11 +- src/heap.c | 14 +- src/init.c | 77 +---- src/memory.c | 2 +- src/os.c | 8 +- src/page.c | 14 +- src/random.c | 290 +++++++++++++++++++ src/static.c | 1 + 18 files changed, 378 insertions(+), 97 deletions(-) create mode 100644 src/random.c diff --git a/CMakeLists.txt b/CMakeLists.txt index c4480b89..a894de9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ include("cmake/mimalloc-config-version.cmake") set(mi_sources src/stats.c + src/random.c src/os.c src/arena.c src/memory.c @@ -115,7 +116,7 @@ endif() # extra needed libraries if(WIN32) - list(APPEND mi_libraries psapi shell32 user32) + list(APPEND mi_libraries psapi shell32 user32 bcrypt) else() list(APPEND mi_libraries pthread) find_library(LIBRT rt) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 1fc70b33..821645e9 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -129,7 +129,7 @@ Default - $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies) + $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies) @@ -195,7 +195,7 @@ true true - $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies) + $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies) Default @@ -244,6 +244,7 @@ true + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 75a8e032..037fbcbb 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -73,5 +73,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 484c4db8..01c6ad27 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -229,6 +229,7 @@ true + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 598b8643..5fe74aa0 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -56,6 +56,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 49f3d213..6ac6541d 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -247,6 +247,7 @@ true + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index b2dea4e1..a8c5a5de 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -46,6 +46,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index bae49bab..1860f26a 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -232,6 +232,7 @@ true + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index 0cce0c4f..61de4afe 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -49,6 +49,9 @@ Source Files + + Source Files + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 99e4b5ba..e648c1ff 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -42,12 +42,17 @@ void _mi_trace_message(const char* fmt, ...); void _mi_options_init(void); void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn; -// "init.c" +// random.c +void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); +uintptr_t _mi_random_next(mi_random_ctx_t* ctx); +uintptr_t _mi_heap_random_next(mi_heap_t* heap); +static inline uintptr_t _mi_random_shuffle(uintptr_t x); + +// init.c extern mi_stats_t _mi_stats_main; extern const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); -uintptr_t _mi_random_shuffle(uintptr_t x); -uintptr_t _mi_random_init(uintptr_t seed /* can be zero */); bool _mi_preloading(); // true while the C runtime is not ready // os.c @@ -100,7 +105,6 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i // "heap.c" void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); -uintptr_t _mi_heap_random(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); // "stats.c" @@ -454,6 +458,29 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #endif } +// ------------------------------------------------------------------- +// Fast "random" shuffle +// ------------------------------------------------------------------- + +static inline uintptr_t _mi_random_shuffle(uintptr_t x) { + mi_assert_internal(x!=0); +#if (MI_INTPTR_SIZE==8) + // by Sebastiano Vigna, see: + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9UL; + x ^= x >> 27; + x *= 0x94d049bb133111ebUL; + x ^= x >> 31; +#elif (MI_INTPTR_SIZE==4) + // by Chris Wellons, see: + x ^= x >> 16; + x *= 0x7feb352dUL; + x ^= x >> 15; + x *= 0x846ca68bUL; + x ^= x >> 16; +#endif + return x; +} // ------------------------------------------------------------------- // Optimize numa node access for the common case (= one node) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index f79c5a64..1360c125 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -76,6 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file #endif #define MI_INTPTR_SIZE (1<random; - heap->random = _mi_random_shuffle(r); - return r; -} - mi_heap_t* mi_heap_new(void) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); @@ -197,12 +191,16 @@ mi_heap_t* mi_heap_new(void) { memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); - heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1; - heap->random = _mi_heap_random(bheap); + _mi_random_split(&bheap->random, &heap->random); + heap->cookie = _mi_heap_random_next(heap) | 1; heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe return heap; } +uintptr_t _mi_heap_random_next(mi_heap_t* heap) { + return _mi_random_next(&heap->random); +} + // zero out the page queues static void mi_heap_reset_pages(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_initialized(heap)); diff --git a/src/init.c b/src/init.c index d8fff823..768bc2bf 100644 --- a/src/init.c +++ b/src/init.c @@ -85,7 +85,7 @@ const mi_heap_t _mi_heap_empty = { ATOMIC_VAR_INIT(NULL), 0, 0, - 0, + { {0}, {0}, 0 }, 0, false }; @@ -116,7 +116,7 @@ mi_heap_t _mi_heap_main = { #else 0xCDCDCDCDUL, #endif - 0, // random + { {0}, {0}, 0 }, // random 0, // page count false // can reclaim }; @@ -125,66 +125,6 @@ bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. mi_stats_t _mi_stats_main = { MI_STATS_NULL }; -/* ----------------------------------------------------------- - Initialization of random numbers ------------------------------------------------------------ */ - -#if defined(_WIN32) -#include -#elif defined(__APPLE__) -#include -#else -#include -#endif - -uintptr_t _mi_random_shuffle(uintptr_t x) { - #if (MI_INTPTR_SIZE==8) - // by Sebastiano Vigna, see: - x ^= x >> 30; - x *= 0xbf58476d1ce4e5b9UL; - x ^= x >> 27; - x *= 0x94d049bb133111ebUL; - x ^= x >> 31; - #elif (MI_INTPTR_SIZE==4) - // by Chris Wellons, see: - x ^= x >> 16; - x *= 0x7feb352dUL; - x ^= x >> 15; - x *= 0x846ca68bUL; - x ^= x >> 16; - #endif - return x; -} - -uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { -#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse - uintptr_t x; - arc4random_buf(&x, sizeof x); -#else - // Hopefully, ASLR makes our function address random - uintptr_t x = (uintptr_t)((void*)&_mi_random_init); - x ^= seed; - // xor with high res time -#if defined(_WIN32) - LARGE_INTEGER pcount; - QueryPerformanceCounter(&pcount); - x ^= (uintptr_t)(pcount.QuadPart); -#elif defined(__APPLE__) - x ^= (uintptr_t)mach_absolute_time(); -#else - struct timespec time; - clock_gettime(CLOCK_MONOTONIC, &time); - x ^= (uintptr_t)time.tv_sec; - x ^= (uintptr_t)time.tv_nsec; -#endif - // and do a few randomization steps - uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; - for (uintptr_t i = 0; i < max; i++) { - x = _mi_random_shuffle(x); - } -#endif - return x; -} /* ----------------------------------------------------------- Initialization and freeing of the thread local heaps @@ -214,8 +154,8 @@ static bool _mi_heap_init(void) { mi_heap_t* heap = &td->heap; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); - heap->random = _mi_random_init(heap->thread_id); - heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1; + _mi_random_init(&heap->random); + heap->cookie = _mi_heap_random_next(heap) | 1; heap->tld = tld; memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; @@ -451,16 +391,15 @@ void mi_process_init(void) mi_attr_noexcept { // access _mi_heap_default before setting _mi_process_is_initialized to ensure // that the TLS slot is allocated without getting into recursion on macOS // when using dynamic linking with interpose. - mi_heap_t* h = mi_get_default_heap(); + mi_get_default_heap(); _mi_process_is_initialized = true; _mi_heap_main.thread_id = _mi_thread_id(); _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); - uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h; - #ifndef __APPLE__ - _mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random; + _mi_random_init(&_mi_heap_main.random); + #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. + _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); #endif - _mi_heap_main.random = _mi_random_shuffle(random); mi_process_setup_auto_thread_done(); _mi_os_init(); #if (MI_DEBUG) diff --git a/src/memory.c b/src/memory.c index 9505c98f..3d6a22f5 100644 --- a/src/memory.c +++ b/src/memory.c @@ -79,7 +79,7 @@ typedef union mi_region_info_u { struct { bool valid; bool is_large; - int numa_node; + short numa_node; }; } mi_region_info_t; diff --git a/src/os.c b/src/os.c index 6cf89c99..9da209ad 100644 --- a/src/os.c +++ b/src/os.c @@ -409,8 +409,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); - init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB + uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all @@ -909,8 +909,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { // Initialize the start address after the 32TiB area start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB #endif } end = start + size; diff --git a/src/page.c b/src/page.c index 2992bf09..471dca97 100644 --- a/src/page.c +++ b/src/page.c @@ -475,11 +475,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co // and initialize the free list by randomly threading through them // set up first element - size_t current = _mi_heap_random(heap) % slice_count; + const uintptr_t r = _mi_heap_random_next(heap); + size_t current = r % slice_count; counts[current]--; mi_block_t* const free_start = blocks[current]; - // and iterate through the rest - uintptr_t rnd = heap->random; + // and iterate through the rest; use `random_shuffle` for performance + uintptr_t rnd = _mi_random_shuffle(r); for (size_t i = 1; i < extend; i++) { // call random_shuffle only every INTPTR_SIZE rounds const size_t round = i%MI_INTPTR_SIZE; @@ -499,8 +500,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co } // prepend to the free list (usually NULL) mi_block_set_next(page, blocks[current], page->free); // end of the list - page->free = free_start; - heap->random = _mi_random_shuffle(rnd); + page->free = free_start; } static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) @@ -608,7 +608,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST - page->cookie = _mi_heap_random(heap) | 1; + page->cookie = _mi_heap_random_next(heap) | 1; #endif page->is_zero = page->is_zero_init; @@ -710,7 +710,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = mi_page_queue(heap,size); mi_page_t* page = pq->first; if (page != NULL) { - if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) { + if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) { // in secure mode, we extend half the time to increase randomness mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); diff --git a/src/random.c b/src/random.c new file mode 100644 index 00000000..063633ff --- /dev/null +++ b/src/random.c @@ -0,0 +1,290 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" + +#include // memset + +/* ---------------------------------------------------------------------------- +We use our own PRNG to keep predictable performance of random number generation +and to avoid implementations that use a lock. We only use the OS provided +random source to initialize the initial seeds. Since we do not need ultimate +performance but we do rely on the security (for secret cookies in secure mode) +we use a cryptographically secure generator (chacha20). +-----------------------------------------------------------------------------*/ + +#define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance? + + +/* ---------------------------------------------------------------------------- +Chacha20 implementation as the original algorithm with a 64-bit nonce +and counter: https://en.wikipedia.org/wiki/Salsa20 +The input matrix has sixteen 32-bit values: +Position 0 to 3: constant key +Position 4 to 11: the key +Position 12 to 13: the counter. +Position 14 to 15: the nonce. + +The implementation uses regular C code which compiles very well on modern compilers. +(gcc x64 has no register spills, and clang 6+ uses SSE instructions) +-----------------------------------------------------------------------------*/ + +static inline uint32_t rotl(uint32_t x, uint32_t shift) { + return (x << shift) | (x >> (32 - shift)); +} + +static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) { + x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16); + x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12); + x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8); + x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7); +} + +static void chacha_block(mi_random_ctx_t* r) +{ + // scramble into `x` + uint32_t x[16]; + for (size_t i = 0; i < 16; i++) { + x[i] = r->input[i]; + } + for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) { + qround(x, 0, 4, 8, 12); + qround(x, 1, 5, 9, 13); + qround(x, 2, 6, 10, 14); + qround(x, 3, 7, 11, 15); + qround(x, 0, 5, 10, 15); + qround(x, 1, 6, 11, 12); + qround(x, 2, 7, 8, 13); + qround(x, 3, 4, 9, 14); + } + + // add scrambled data to the initial state + for (size_t i = 0; i < 16; i++) { + r->output[i] = x[i] + r->input[i]; + } + r->output_available = 16; + + // increment the counter for the next round + r->input[12] += 1; + if (r->input[12] == 0) { + r->input[13] += 1; + if (r->input[13] == 0) { // and keep increasing into the nonce + r->input[14] += 1; + } + } +} + +static uint32_t chacha_next32(mi_random_ctx_t* r) { + if (r->output_available <= 0) { + chacha_block(r); + r->output_available = 16; // (assign again to suppress static analysis warning) + } + r->output_available--; + const uint32_t x = r->output[r->output_available]; + r->output[r->output_available] = 0; // reset once the data is handed out + return x; +} + +static inline uint32_t read32(const uint8_t* p, size_t idx32) { + const size_t i = 4*idx32; + return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24); +} + +static void chacha_init(mi_random_ctx_t* r, const uint8_t key[32], uint64_t nonce) +{ + // since we only use chacha for randomness (and not encryption) we + // do not _need_ to read 32-bit values as little endian but we do anyways + // just for being compatible :-) + memset(r, 0, sizeof(*r)); + for (size_t i = 0; i < 4; i++) { + const uint8_t* sigma = (uint8_t*)"expand 32-byte k"; + r->input[i] = read32(sigma,i); + } + for (size_t i = 0; i < 8; i++) { + r->input[i + 4] = read32(key,i); + } + r->input[12] = 0; + r->input[13] = 0; + r->input[14] = (uint32_t)nonce; + r->input[15] = (uint32_t)(nonce >> 32); +} + +static void chacha_split(mi_random_ctx_t* r, uint64_t nonce, mi_random_ctx_t* init) { + memset(init, 0, sizeof(*init)); + memcpy(init->input, r->input, sizeof(init->input)); + init->input[12] = 0; + init->input[13] = 0; + init->input[14] = (uint32_t)nonce; + init->input[15] = (uint32_t)(nonce >> 32); + mi_assert_internal(r->input[14] != init->input[14] || r->input[15] != init->input[15]); // do not reuse nonces! + chacha_block(init); +} + + +/* ---------------------------------------------------------------------------- +Random interface +-----------------------------------------------------------------------------*/ + +#if MI_DEBUG>1 +static bool mi_random_is_initialized(mi_random_ctx_t* ctx) { + return (ctx != NULL && ctx->input[0] != 0); +} +#endif + +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx) { + mi_assert_internal(mi_random_is_initialized(ctx)); + mi_assert_internal(ctx != new_ctx); + chacha_split(ctx, (uintptr_t)new_ctx /*nonce*/, new_ctx); +} + +uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { + mi_assert_internal(mi_random_is_initialized(ctx)); + #if MI_INTPTR_SIZE <= 4 + return chacha_next32(ctx); + #elif MI_INTPTR_SIZE == 8 + return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); + #else + # error "define mi_random_next for this platform" + #endif +} + + +/* ---------------------------------------------------------------------------- +To initialize a fresh random context we rely on the OS: +- windows: BCryptGenRandom +- bsd,wasi: arc4random_buf +- linux: getrandom +If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. +-----------------------------------------------------------------------------*/ + +#if defined(_WIN32) +#pragma comment (lib,"bcrypt.lib") +#include +static bool os_random_buf(void* buf, size_t buf_len) { + return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); +} +/* +#define SystemFunction036 NTAPI SystemFunction036 +#include +#undef SystemFunction036 +static bool os_random_buf(void* buf, size_t buf_len) { + RtlGenRandom(buf, (ULONG)buf_len); + return true; +} +*/ +#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__wasi__) +#include +static bool os_random_buf(void* buf, size_t buf_len) { + arc4random_buf(buf, buf_len); + return true; +} +#elif defined(__linux__) +#include +static bool os_random_buf(void* buf, size_t buf_len) { + return (getrandom(buf, buf_len, GRND_NONBLOCK) == (ssize_t)buf_len); +} +#else +static bool os_random_buf(void* buf, size_t buf_len) { + return false; +} +#endif + +#if defined(_WIN32) +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + +static uintptr_t os_random_weak(uintptr_t extra_seed) { + uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random + #if defined(_WIN32) + LARGE_INTEGER pcount; + QueryPerformanceCounter(&pcount); + x ^= (uintptr_t)(pcount.QuadPart); + #elif defined(__APPLE__) + x ^= (uintptr_t)mach_absolute_time(); + #else + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + x ^= (uintptr_t)time.tv_sec; + x ^= (uintptr_t)time.tv_nsec; + #endif + // and do a few randomization steps + uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; + for (uintptr_t i = 0; i < max; i++) { + x = _mi_random_shuffle(x); + } + mi_assert_internal(x != 0); + return x; +} + +void _mi_random_init(mi_random_ctx_t* ctx) { + uint8_t key[32]; + if (!os_random_buf(key, sizeof(key))) { + // if we fail to get random data from the OS, we fall back to a + // weak random source based on the current time + uintptr_t x = os_random_weak(0); + for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. + _mi_warning_message("unable to use secure randomness\n"); + x = _mi_random_shuffle(x); + ((uint32_t*)key)[i] = (uint32_t)x; + } + } + chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ ); +} + +/* -------------------------------------------------------- +test vectors from +----------------------------------------------------------- */ +/* +static bool array_equals(uint32_t* x, uint32_t* y, size_t n) { + for (size_t i = 0; i < n; i++) { + if (x[i] != y[i]) return false; + } + return true; +} +static void chacha_test(void) +{ + uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 }; + uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb }; + qround(x, 0, 1, 2, 3); + mi_assert_internal(array_equals(x, x_out, 4)); + + uint32_t y[16] = { + 0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a, + 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c, + 0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963, + 0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 }; + uint32_t y_out[16] = { + 0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a, + 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2, + 0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963, + 0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 }; + qround(y, 2, 7, 8, 13); + mi_assert_internal(array_equals(y, y_out, 16)); + + mi_random_ctx_t r = { + { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, + 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c, + 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c, + 0x00000001, 0x09000000, 0x4a000000, 0x00000000 }, + {0}, + 0 + }; + uint32_t r_out[16] = { + 0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3, + 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3, + 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9, + 0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 }; + chacha_block(&r); + mi_assert_internal(array_equals(r.output, r_out, 16)); +} +*/ \ No newline at end of file diff --git a/src/static.c b/src/static.c index d31fca8f..0519453e 100644 --- a/src/static.c +++ b/src/static.c @@ -14,6 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file // it will override all the standard library allocation // functions (on Unix's). #include "stats.c" +#include "random.c" #include "os.c" #include "arena.c" #include "memory.c" From e05a1edc038477574ee5c1e4ea00f0a7b9ab9e67 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 24 Dec 2019 10:32:44 -0800 Subject: [PATCH 126/293] fix large OS page size on Linux (issue #184, due to fix for #179) --- src/os.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/os.c b/src/os.c index 8f5afc5b..54088f83 100644 --- a/src/os.c +++ b/src/os.c @@ -171,9 +171,7 @@ void _mi_os_init() { os_page_size = (size_t)result; os_alloc_granularity = os_page_size; } - if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = (1UL << 21); // 2MiB - } + large_os_page_size = 2*MiB; // TODO: can we query the OS for this? } #endif From 49acc88924c7afd6a00c0836231e8923769fbe26 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 24 Dec 2019 10:38:13 -0800 Subject: [PATCH 127/293] Update readme.md --- readme.md | 1 + 1 file changed, 1 insertion(+) diff --git a/readme.md b/readme.md index 9d3974c9..32332c08 100644 --- a/readme.md +++ b/readme.md @@ -56,6 +56,7 @@ Enjoy! ### Releases +* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. From ce02986d56cb69dd2f2d2b1a5c25260338665957 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 27 Dec 2019 22:30:23 -0800 Subject: [PATCH 128/293] variable renaming --- src/random.c | 72 ++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/random.c b/src/random.c index 063633ff..43e7dd5c 100644 --- a/src/random.c +++ b/src/random.c @@ -44,12 +44,12 @@ static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7); } -static void chacha_block(mi_random_ctx_t* r) +static void chacha_block(mi_random_ctx_t* ctx) { // scramble into `x` uint32_t x[16]; for (size_t i = 0; i < 16; i++) { - x[i] = r->input[i]; + x[i] = ctx->input[i]; } for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) { qround(x, 0, 4, 8, 12); @@ -64,28 +64,28 @@ static void chacha_block(mi_random_ctx_t* r) // add scrambled data to the initial state for (size_t i = 0; i < 16; i++) { - r->output[i] = x[i] + r->input[i]; + ctx->output[i] = x[i] + ctx->input[i]; } - r->output_available = 16; + ctx->output_available = 16; // increment the counter for the next round - r->input[12] += 1; - if (r->input[12] == 0) { - r->input[13] += 1; - if (r->input[13] == 0) { // and keep increasing into the nonce - r->input[14] += 1; + ctx->input[12] += 1; + if (ctx->input[12] == 0) { + ctx->input[13] += 1; + if (ctx->input[13] == 0) { // and keep increasing into the nonce + ctx->input[14] += 1; } } } -static uint32_t chacha_next32(mi_random_ctx_t* r) { - if (r->output_available <= 0) { - chacha_block(r); - r->output_available = 16; // (assign again to suppress static analysis warning) +static uint32_t chacha_next32(mi_random_ctx_t* ctx) { + if (ctx->output_available <= 0) { + chacha_block(ctx); + ctx->output_available = 16; // (assign again to suppress static analysis warning) } - r->output_available--; - const uint32_t x = r->output[r->output_available]; - r->output[r->output_available] = 0; // reset once the data is handed out + const uint32_t x = ctx->output[16 - ctx->output_available]; + ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out + ctx->output_available--; return x; } @@ -94,34 +94,34 @@ static inline uint32_t read32(const uint8_t* p, size_t idx32) { return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24); } -static void chacha_init(mi_random_ctx_t* r, const uint8_t key[32], uint64_t nonce) +static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce) { // since we only use chacha for randomness (and not encryption) we // do not _need_ to read 32-bit values as little endian but we do anyways // just for being compatible :-) - memset(r, 0, sizeof(*r)); + memset(ctx, 0, sizeof(*ctx)); for (size_t i = 0; i < 4; i++) { const uint8_t* sigma = (uint8_t*)"expand 32-byte k"; - r->input[i] = read32(sigma,i); + ctx->input[i] = read32(sigma,i); } for (size_t i = 0; i < 8; i++) { - r->input[i + 4] = read32(key,i); + ctx->input[i + 4] = read32(key,i); } - r->input[12] = 0; - r->input[13] = 0; - r->input[14] = (uint32_t)nonce; - r->input[15] = (uint32_t)(nonce >> 32); + ctx->input[12] = 0; + ctx->input[13] = 0; + ctx->input[14] = (uint32_t)nonce; + ctx->input[15] = (uint32_t)(nonce >> 32); } -static void chacha_split(mi_random_ctx_t* r, uint64_t nonce, mi_random_ctx_t* init) { - memset(init, 0, sizeof(*init)); - memcpy(init->input, r->input, sizeof(init->input)); - init->input[12] = 0; - init->input[13] = 0; - init->input[14] = (uint32_t)nonce; - init->input[15] = (uint32_t)(nonce >> 32); - mi_assert_internal(r->input[14] != init->input[14] || r->input[15] != init->input[15]); // do not reuse nonces! - chacha_block(init); +static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) { + memset(ctx_new, 0, sizeof(*ctx_new)); + memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input)); + ctx_new->input[12] = 0; + ctx_new->input[13] = 0; + ctx_new->input[14] = (uint32_t)nonce; + ctx_new->input[15] = (uint32_t)(nonce >> 32); + mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces! + chacha_block(ctx_new); } @@ -135,10 +135,10 @@ static bool mi_random_is_initialized(mi_random_ctx_t* ctx) { } #endif -void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx) { +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) { mi_assert_internal(mi_random_is_initialized(ctx)); - mi_assert_internal(ctx != new_ctx); - chacha_split(ctx, (uintptr_t)new_ctx /*nonce*/, new_ctx); + mi_assert_internal(ctx != ctx_new); + chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new); } uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { From e3391d9a53c66f922c6e0ac12df4723701a05110 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 27 Dec 2019 23:33:50 -0800 Subject: [PATCH 129/293] stronger encoding of free lists using two keys per page --- include/mimalloc-internal.h | 58 +++++++++++++++++++++++++------------ include/mimalloc-types.h | 17 ++++++----- src/alloc.c | 8 ++--- src/heap.c | 2 ++ src/init.c | 30 ++++++++++++------- src/page.c | 14 ++++----- src/random.c | 2 +- src/segment.c | 2 +- 8 files changed, 83 insertions(+), 50 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index e648c1ff..cdaac963 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -392,12 +392,28 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { } -// ------------------------------------------------------------------- -// Encoding/Decoding the free list next pointers -// Note: we pass a `null` value to be used as the `NULL` value for the -// end of a free list. This is to prevent the cookie itself to ever -// be present among user blocks (as `cookie^0==cookie`). -// ------------------------------------------------------------------- +/* ------------------------------------------------------------------- +Encoding/Decoding the free list next pointers + +This is to protect against buffer overflow exploits where the +free list is mutated. Many hardened allocators xor the next pointer `p` +with a secret key `k1`, as `p^k1`, but if the attacker can guess +the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). +Moreover, if multiple blocks can be read, the attacker can +xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot +about the pointers (and subsequently `k1`). + +Instead mimalloc uses an extra key `k2` and encode as `rotl(p+k2,13)^k1`. +Since these operations are not associative, the above approaches do not +work so well any more even if the `p` can be guesstimated. (We include +the rotation since xor and addition are otherwise linear in the lowest bit) +Both keys are unique per page. + +We also pass a separate `null` value to be used as `NULL` or otherwise +`rotl(k2,13)^k1` would appear (too) often as a sentinel value. +------------------------------------------------------------------- */ + +#define MI_ENCODE_ROTATE_BITS (13) static inline bool mi_is_in_same_segment(const void* p, const void* q) { return (_mi_ptr_segment(p) == _mi_ptr_segment(q)); @@ -412,49 +428,55 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) { return (idxp == idxq); } -static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) { +static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) { + return ((x << shift) | (x >> (MI_INTPTR_BITS - shift))); +} +static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { + return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); +} +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) { #ifdef MI_ENCODE_FREELIST - mi_block_t* b = (mi_block_t*)(block->next ^ cookie); + mi_block_t* b = (mi_block_t*)(mi_rotr(block->next ^ key1, MI_ENCODE_ROTATE_BITS) - key2); if (mi_unlikely((void*)b==null)) { b = NULL; } return b; #else - UNUSED(cookie); UNUSED(null); + UNUSED(key1); UNUSED(key2); UNUSED(null); return (mi_block_t*)block->next; #endif } -static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) { +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) { #ifdef MI_ENCODE_FREELIST if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } - block->next = (mi_encoded_t)next ^ cookie; + block->next = mi_rotl((mi_encoded_t)next + key2, MI_ENCODE_ROTATE_BITS) ^ key1; #else - UNUSED(cookie); UNUSED(null); + UNUSED(key1); UNUSED(key2); UNUSED(null); block->next = (mi_encoded_t)next; #endif } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { #ifdef MI_ENCODE_FREELIST - mi_block_t* next = mi_block_nextx(page,block,page->cookie); - // check for free list corruption: is `next` at least in our segment range? + mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]); + // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? - if (next!=NULL && !mi_is_in_same_page(block, next)) { + if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); next = NULL; } return next; #else UNUSED(page); - return mi_block_nextx(page,block,0); + return mi_block_nextx(page,block,0,0); #endif } static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST - mi_block_set_nextx(page,block,next, page->cookie); + mi_block_set_nextx(page,block,next, page->key[0], page->key[1]); #else UNUSED(page); - mi_block_set_nextx(page,block, next,0); + mi_block_set_nextx(page,block, next,0,0); #endif } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 1360c125..ab7d7c53 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -191,7 +191,7 @@ typedef struct mi_page_s { mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #ifdef MI_ENCODE_FREELIST - uintptr_t cookie; // random cookie to encode the free lists + uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) @@ -206,9 +206,9 @@ typedef struct mi_page_s { struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` // improve page index calculation - // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word - #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) - void* padding[1]; // 12 words on 64-bit with cookie, 12 words on 32-bit plain + // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words + #if (MI_INTPTR_SIZE==4) + void* padding[1]; // 12/14 words on 32-bit plain #endif } mi_page_t; @@ -239,8 +239,8 @@ typedef struct mi_segment_s { size_t capacity; // count of available pages (`#free + used`) size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. - uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` - + uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` + // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment @@ -289,8 +289,9 @@ struct mi_heap_s { mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") volatile _Atomic(mi_block_t*) thread_delayed_free; uintptr_t thread_id; // thread this heap belongs too - uintptr_t cookie; - mi_random_ctx_t random; // random number used for secure allocation + uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) + uintptr_t key[2]; // twb random keys used to encode the `thread_delayed_free` list + mi_random_ctx_t random; // random number context used for secure allocation size_t page_count; // total number of pages in the `pages` queues. bool no_reclaim; // `true` if this heap should not reclaim abandoned pages }; diff --git a/src/alloc.c b/src/alloc.c index e68b48d2..714acc76 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -157,7 +157,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field + mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? { @@ -242,7 +242,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_t* dfree; do { dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap,block,dfree, heap->cookie); + mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); } @@ -266,7 +266,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block // and push it on the free list if (mi_likely(local)) { // owning thread can free a block directly - if (mi_check_is_double_free(page, block)) return; + if (mi_unlikely(mi_check_is_double_free(page, block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -341,7 +341,7 @@ void mi_free(void* p) mi_attr_noexcept if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; - if (mi_check_is_double_free(page,block)) return; + if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; diff --git a/src/heap.c b/src/heap.c index 6d6948df..f90c4624 100644 --- a/src/heap.c +++ b/src/heap.c @@ -193,6 +193,8 @@ mi_heap_t* mi_heap_new(void) { heap->thread_id = _mi_thread_id(); _mi_random_split(&bheap->random, &heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; + heap->key[0] = _mi_heap_random_next(heap); + heap->key[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe return heap; } diff --git a/src/init.c b/src/init.c index 768bc2bf..cadcd2a3 100644 --- a/src/init.c +++ b/src/init.c @@ -16,13 +16,13 @@ const mi_page_t _mi_page_empty = { { 0 }, false, NULL, // free #if MI_ENCODE_FREELIST - 0, + { 0, 0 }, #endif 0, // used NULL, ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) + #if (MI_INTPTR_SIZE==4) , { NULL } // padding #endif }; @@ -83,8 +83,9 @@ const mi_heap_t _mi_heap_empty = { MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, ATOMIC_VAR_INIT(NULL), - 0, - 0, + 0, // tid + 0, // cookie + { 0, 0 }, // keys { {0}, {0}, 0 }, 0, false @@ -105,18 +106,21 @@ static mi_tld_t tld_main = { { MI_STATS_NULL } // stats }; +#if MI_INTPTR_SIZE==8 +#define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL) +#else +#define MI_INIT_COOKIE (0xCDCDCDCDUL) +#endif + mi_heap_t _mi_heap_main = { &tld_main, MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, NULL, - 0, // thread id -#if MI_INTPTR_SIZE==8 // the cookie of the main heap can be fixed (unlike page cookies that need to be secure!) - 0xCDCDCDCDCDCDCDCDUL, -#else - 0xCDCDCDCDUL, -#endif - { {0}, {0}, 0 }, // random + 0, // thread id + MI_INIT_COOKIE, // initial cookie + { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) + { {0}, {0}, 0 }, // random 0, // page count false // can reclaim }; @@ -156,6 +160,8 @@ static bool _mi_heap_init(void) { heap->thread_id = _mi_thread_id(); _mi_random_init(&heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; + heap->key[0] = _mi_heap_random_next(heap); + heap->key[1] = _mi_heap_random_next(heap); heap->tld = tld; memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; @@ -399,6 +405,8 @@ void mi_process_init(void) mi_attr_noexcept { _mi_random_init(&_mi_heap_main.random); #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main); #endif mi_process_setup_auto_thread_done(); _mi_os_init(); diff --git a/src/page.c b/src/page.c index 471dca97..901fbda1 100644 --- a/src/page.c +++ b/src/page.c @@ -103,7 +103,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); #if MI_SECURE - mi_assert_internal(page->cookie != 0); + mi_assert_internal(page->key != 0); #endif if (page->heap!=NULL) { mi_segment_t* segment = _mi_page_segment(page); @@ -284,7 +284,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // and free them all while(block != NULL) { - mi_block_t* next = mi_block_nextx(heap,block, heap->cookie); + mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet @@ -292,9 +292,8 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* dfree; do { dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap, block, dfree, heap->cookie); + mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); - } block = next; } @@ -357,7 +356,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif @@ -608,7 +607,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST - page->cookie = _mi_heap_random_next(heap) | 1; + page->key[0] = _mi_heap_random_next(heap); + page->key[1] = _mi_heap_random_next(heap); #endif page->is_zero = page->is_zero_init; @@ -621,7 +621,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->prev == NULL); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) - mi_assert_internal(page->cookie != 0); + mi_assert_internal(page->key != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); diff --git a/src/random.c b/src/random.c index 43e7dd5c..af6cd876 100644 --- a/src/random.c +++ b/src/random.c @@ -231,9 +231,9 @@ void _mi_random_init(mi_random_ctx_t* ctx) { if (!os_random_buf(key, sizeof(key))) { // if we fail to get random data from the OS, we fall back to a // weak random source based on the current time + _mi_warning_message("unable to use secure randomness\n"); uintptr_t x = os_random_weak(0); for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. - _mi_warning_message("unable to use secure randomness\n"); x = _mi_random_shuffle(x); ((uint32_t*)key)[i] = (uint32_t)x; } diff --git a/src/segment.c b/src/segment.c index f6ce939b..bbe88f82 100644 --- a/src/segment.c +++ b/src/segment.c @@ -520,7 +520,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_size = segment_size; segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); - segment->cookie = _mi_ptr_cookie(segment); + segment->cookie = _mi_ptr_cookie(segment); // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); // set protection From 77134e1ad072aa3bf3fd5e225f58ae88b48db589 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 28 Dec 2019 15:17:49 -0800 Subject: [PATCH 130/293] update free list encoding to stronger formula with addition last --- include/mimalloc-internal.h | 29 +++++++++++++++++------------ src/page.c | 2 +- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cdaac963..d41dfadc 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -397,24 +397,26 @@ Encoding/Decoding the free list next pointers This is to protect against buffer overflow exploits where the free list is mutated. Many hardened allocators xor the next pointer `p` -with a secret key `k1`, as `p^k1`, but if the attacker can guess +with a secret key `k1`, as `p^k1`. This prevents overwriting with known +values but might be still too weak: if the attacker can guess the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). -Moreover, if multiple blocks can be read, the attacker can +Moreover, if multiple blocks can be read as well, the attacker can xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot about the pointers (and subsequently `k1`). -Instead mimalloc uses an extra key `k2` and encode as `rotl(p+k2,13)^k1`. +Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<> (MI_INTPTR_BITS - shift))); } static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { + shift %= MI_INTPTR_BITS; return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); } + static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) { #ifdef MI_ENCODE_FREELIST - mi_block_t* b = (mi_block_t*)(mi_rotr(block->next ^ key1, MI_ENCODE_ROTATE_BITS) - key2); + mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2); if (mi_unlikely((void*)b==null)) { b = NULL; } return b; #else @@ -448,7 +453,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) { #ifdef MI_ENCODE_FREELIST if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } - block->next = mi_rotl((mi_encoded_t)next + key2, MI_ENCODE_ROTATE_BITS) ^ key1; + block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1; #else UNUSED(key1); UNUSED(key2); UNUSED(null); block->next = (mi_encoded_t)next; @@ -485,7 +490,7 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c // ------------------------------------------------------------------- static inline uintptr_t _mi_random_shuffle(uintptr_t x) { - mi_assert_internal(x!=0); + if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros #if (MI_INTPTR_SIZE==8) // by Sebastiano Vigna, see: x ^= x >> 30; diff --git a/src/page.c b/src/page.c index 901fbda1..b070e56a 100644 --- a/src/page.c +++ b/src/page.c @@ -479,7 +479,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co counts[current]--; mi_block_t* const free_start = blocks[current]; // and iterate through the rest; use `random_shuffle` for performance - uintptr_t rnd = _mi_random_shuffle(r); + uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0 for (size_t i = 1; i < extend; i++) { // call random_shuffle only every INTPTR_SIZE rounds const size_t round = i%MI_INTPTR_SIZE; From fc3e537bd4ac6d9ffec0243ec595ed15ca1649b8 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 28 Dec 2019 15:28:13 -0800 Subject: [PATCH 131/293] improve double free detection with faster same page check --- include/mimalloc-types.h | 2 +- src/alloc.c | 26 +++++++++++--------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index ab7d7c53..76539bd6 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) -#define MI_SECURE 0 +#define MI_SECURE 4 #endif // Define MI_DEBUG for debug mode diff --git a/src/alloc.c b/src/alloc.c index 714acc76..82d97786 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -140,28 +140,24 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons } static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { - size_t psize; - uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); - if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) { - // Suspicious: the decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed - if (mi_list_contains(page, page->free, block) || - mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) - { - _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); - return true; - } + // The decoded value is in the same page (or NULL). + // Walk the free lists to verify positively if it is already freed + if (mi_list_contains(page, page->free, block) || + mi_list_contains(page, page->local_free, block) || + mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) + { + _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); + return true; } return false; } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field - if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? - (n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? + if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? + (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? { - // Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free? + // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? // (continue in separate function to improve code generation) return mi_check_is_double_freex(page, block, n); } From 1b5a08cd25ee0034942df3d5f67dab2d891ba3c1 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 2 Jan 2020 17:24:32 -0800 Subject: [PATCH 132/293] remove unused parameter in check double free --- src/segment.c | 72 +++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/segment.c b/src/segment.c index bbe88f82..676df00a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -184,7 +184,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* mi_segment_protect_range(start, os_page_size, protect); } else { - // or protect every page + // or protect every page const size_t page_size = mi_segment_page_size(segment); for (size_t i = 0; i < segment->capacity; i++) { if (segment->pages[i].is_committed) { @@ -215,8 +215,8 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m } static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) -{ - mi_assert_internal(page->is_reset); +{ + mi_assert_internal(page->is_reset); mi_assert_internal(!segment->mem_is_fixed); page->is_reset = false; size_t psize; @@ -276,14 +276,14 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } mi_assert_internal((uintptr_t)p % block_size == 0); } - + if (page_size != NULL) *page_size = psize; mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } -static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) +static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; size_t guardsize = 0; @@ -331,16 +331,16 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_assert_internal(!segment->mem_is_fixed); mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set } - + bool any_reset = false; bool fully_committed = true; for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; + mi_page_t* page = &segment->pages[i]; if (!page->is_committed) { fully_committed = false; } if (page->is_reset) { any_reset = true; } } - if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { - fully_committed = false; + if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { + fully_committed = false; } if (segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { fully_committed = false; @@ -366,13 +366,13 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t return segment; } -static bool mi_segment_cache_full(mi_segments_tld_t* tld) +static bool mi_segment_cache_full(mi_segments_tld_t* tld) { // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread size_t max_cache = mi_option_get(mi_option_segment_cache); if (tld->cache_count < max_cache && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache - ) { + ) { return false; } // take the opportunity to reduce the segment cache if it is too large (now) @@ -387,7 +387,7 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); - mi_assert_internal(segment->next == NULL); + mi_assert_internal(segment->next == NULL); if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { return false; } @@ -434,21 +434,21 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t pre_size; size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); mi_assert_internal(segment_size >= required); - + // Initialize parameters - bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); - bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); + const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + const bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager; // || (page_kind >= MI_PAGE_LARGE); bool pages_still_good = false; bool is_zero = false; - + // Try to get it from our thread local cache first mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) { pages_still_good = true; } - else + else { if (MI_SECURE!=0) { mi_assert_internal(!segment->mem_is_fixed); @@ -458,7 +458,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, // TODO: optimize cache pop to return fitting pages if possible? for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (page->is_reset) { + if (page->is_reset) { if (!commit && mi_option_is_enabled(mi_option_reset_decommits)) { page->is_reset = false; } @@ -473,12 +473,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); if (commit_zero) is_zero = true; } - } + } } else { // Allocate the segment from the OS size_t memid; - bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy + bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { @@ -489,12 +489,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } segment->memid = memid; segment->mem_is_fixed = mem_large; - segment->mem_is_committed = commit; + segment->mem_is_committed = commit; mi_segments_track_size((long)segment_size, tld); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - if (!pages_still_good) { + if (!pages_still_good) { // zero the segment info (but not the `mem` fields) ptrdiff_t ofs = offsetof(mi_segment_t, next); memset((uint8_t*)segment + ofs, 0, info_size - ofs); @@ -520,12 +520,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_size = segment_size; segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); - segment->cookie = _mi_ptr_cookie(segment); + segment->cookie = _mi_ptr_cookie(segment); // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); // set protection mi_segment_protect(segment, true, tld->os); - + //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } @@ -541,8 +541,8 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); mi_assert(segment->next == NULL); mi_assert(segment->prev == NULL); - _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - + _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); + if (!force && mi_segment_cache_push(segment, tld)) { // it is put in our cache } @@ -569,12 +569,12 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* if (!page->segment_in_use) { // set in-use before doing unreset to prevent delayed reset page->segment_in_use = true; - segment->used++; + segment->used++; if (!page->is_committed) { mi_assert_internal(!segment->mem_is_fixed); mi_assert_internal(!page->is_reset); page->is_committed = true; - if (segment->page_kind < MI_PAGE_LARGE || mi_option_is_enabled(mi_option_eager_page_commit)) { + if (segment->page_kind < MI_PAGE_LARGE || !mi_option_is_enabled(mi_option_eager_page_commit)) { size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); bool is_zero = false; @@ -586,7 +586,7 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* } if (page->is_reset) { mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? - } + } return page; } } @@ -608,7 +608,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); - + // calculate the used size from the raw (non-aligned) start of the page //size_t pre_size; //_mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); @@ -621,7 +621,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg // note: must come after setting `segment_in_use` to false but before block_size becomes 0 mi_page_reset(segment, page, 0 /*used_size*/, tld); - // zero the page data, but not the segment fields + // zero the page data, but not the segment fields ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); segment->used--; @@ -674,7 +674,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { // remove the segment from the free page queue if needed mi_segment_remove_from_free_queue(segment,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); - + // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); @@ -691,7 +691,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment)); - segment->abandoned++; + segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); if (segment->used == segment->abandoned) { @@ -744,7 +744,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_page_clear(segment,page,tld); } else { - // otherwise reclaim it + // otherwise reclaim it _mi_page_reclaim(heap,page); } } @@ -774,7 +774,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld); - mi_assert_internal(page->segment_in_use); + mi_assert_internal(page->segment_in_use); mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { // if no more free pages, remove from the queue @@ -813,7 +813,7 @@ static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); - if (segment == NULL) return NULL; + if (segment == NULL) return NULL; mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); #if MI_DEBUG>=2 From 9629a0190f5eac495936e0b0970b4343c6abb975 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 2 Jan 2020 17:25:00 -0800 Subject: [PATCH 133/293] fix eager commit on large pages (issue #182) --- src/alloc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 82d97786..8ee78338 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -125,7 +125,7 @@ mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept { // ------------------------------------------------------ -// Check for double free in secure and debug mode +// Check for double free in secure and debug mode // This is somewhat expensive so only enabled for secure mode 4 // ------------------------------------------------------ @@ -139,12 +139,12 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons return false; } -static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { +static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { // The decoded value is in the same page (or NULL). // Walk the free lists to verify positively if it is already freed if (mi_list_contains(page, page->free, block) || mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) + mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) { _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); return true; @@ -156,11 +156,11 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? - { + { // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? // (continue in separate function to improve code generation) - return mi_check_is_double_freex(page, block, n); - } + return mi_check_is_double_freex(page, block); + } return false; } #else @@ -337,7 +337,7 @@ void mi_free(void* p) mi_attr_noexcept if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; - if (mi_unlikely(mi_check_is_double_free(page,block))) return; + if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; From f9ca88f71cbc3f43601ddedd6547f3a85c865bb5 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 2 Jan 2020 17:57:41 -0800 Subject: [PATCH 134/293] set secure default to 0 again --- include/mimalloc-types.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 76539bd6..d334489c 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) -#define MI_SECURE 4 +#define MI_SECURE 0 #endif // Define MI_DEBUG for debug mode @@ -46,7 +46,7 @@ terms of the MIT license. A copy of the license can be found in the file // Encoded free lists allow detection of corrupted free lists // and can detect buffer overflows and double `free`s. -#if (MI_SECURE>=3 || MI_DEBUG>=1) +#if (MI_SECURE>=3 || MI_DEBUG>=1) #define MI_ENCODE_FREELIST 1 #endif @@ -109,8 +109,8 @@ terms of the MIT license. A copy of the license can be found in the file // (Except for large pages since huge objects are allocated in 4MiB chunks) #define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 16kb #define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb -#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2mb -#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) +#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2mb +#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) #define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) // Minimal alignment necessary. On most platforms 16 bytes are needed @@ -143,14 +143,14 @@ typedef enum mi_delayed_e { } mi_delayed_t; -// The `in_full` and `has_aligned` page flags are put in a union to efficiently +// The `in_full` and `has_aligned` page flags are put in a union to efficiently // test if both are false (`full_aligned == 0`) in the `mi_free` routine. typedef union mi_page_flags_s { uint8_t full_aligned; struct { uint8_t in_full : 1; uint8_t has_aligned : 1; - } x; + } x; } mi_page_flags_t; // Thread free list. @@ -182,7 +182,7 @@ typedef struct mi_page_s { uint8_t is_reset:1; // `true` if the page memory was reset uint8_t is_committed:1; // `true` if the page virtual memory is committed uint8_t is_zero_init:1; // `true` if the page was zero initialized - + // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory @@ -194,7 +194,7 @@ typedef struct mi_page_s { uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) - + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) volatile _Atomic(uintptr_t) thread_freed; // at least this number of blocks are in `thread_free` volatile _Atomic(mi_thread_free_t) thread_free; // list of deferred free blocks freed by other threads @@ -227,7 +227,7 @@ typedef enum mi_page_kind_e { typedef struct mi_segment_s { // memory fields size_t memid; // id for the os-level memory manager - bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) + bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) bool mem_is_committed; // `true` if the whole segment is eagerly committed // segment fields @@ -240,7 +240,7 @@ typedef struct mi_segment_s { size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` - + // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment From eeb623e6af4d00d96a147a0d782298c5e4db987d Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 3 Jan 2020 17:06:41 -0800 Subject: [PATCH 135/293] increase retire limit, collect retired pages --- include/mimalloc-types.h | 3 ++- src/init.c | 28 ++++++++++++--------- src/page.c | 54 +++++++++++++++++++++++++++++----------- 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index d334489c..68529c3f 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -187,7 +187,8 @@ typedef struct mi_page_s { uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) - bool is_zero; // `true` if the blocks in the free list are zero initialized + uint8_t is_zero:1; // `true` if the blocks in the free list are zero initialized + uint8_t retire_expire:7; // expiration count for retired blocks mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #ifdef MI_ENCODE_FREELIST diff --git a/src/init.c b/src/init.c index cadcd2a3..3df854cf 100644 --- a/src/init.c +++ b/src/init.c @@ -12,8 +12,12 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, false, 0, 0, - { 0 }, false, + 0, false, false, false, false, + 0, // capacity + 0, // reserved capacity + { 0 }, // flags + false, // is_zero + 0, // retire_expire NULL, // free #if MI_ENCODE_FREELIST { 0, 0 }, @@ -83,11 +87,11 @@ const mi_heap_t _mi_heap_empty = { MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, ATOMIC_VAR_INIT(NULL), - 0, // tid - 0, // cookie - { 0, 0 }, // keys + 0, // tid + 0, // cookie + { 0, 0 }, // keys { {0}, {0}, 0 }, - 0, + 0, // page count false }; @@ -106,7 +110,7 @@ static mi_tld_t tld_main = { { MI_STATS_NULL } // stats }; -#if MI_INTPTR_SIZE==8 +#if MI_INTPTR_SIZE==8 #define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL) #else #define MI_INIT_COOKIE (0xCDCDCDCDUL) @@ -121,8 +125,8 @@ mi_heap_t _mi_heap_main = { MI_INIT_COOKIE, // initial cookie { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) { {0}, {0}, 0 }, // random - 0, // page count - false // can reclaim + 0, // page count + false // can reclaim }; bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. @@ -136,7 +140,7 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL }; typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` - mi_tld_t tld; + mi_tld_t tld; } mi_thread_data_t; // Initialize the thread local default heap, called from `mi_thread_init` @@ -158,7 +162,7 @@ static bool _mi_heap_init(void) { mi_heap_t* heap = &td->heap; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); - _mi_random_init(&heap->random); + _mi_random_init(&heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; heap->key[0] = _mi_heap_random_next(heap); heap->key[1] = _mi_heap_random_next(heap); @@ -402,7 +406,7 @@ void mi_process_init(void) mi_attr_noexcept { _mi_heap_main.thread_id = _mi_thread_id(); _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); - _mi_random_init(&_mi_heap_main.random); + _mi_random_init(&_mi_heap_main.random); #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); diff --git a/src/page.c b/src/page.c index b070e56a..f5f51a72 100644 --- a/src/page.c +++ b/src/page.c @@ -229,7 +229,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_internal(page->heap == NULL); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_assert_internal(!page->is_reset); + mi_assert_internal(!page->is_reset); _mi_page_free_collect(page,false); mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); mi_page_queue_push(heap, pq, page); @@ -342,7 +342,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(page->heap != NULL); - + #if MI_DEBUG > 1 mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); #endif @@ -392,7 +392,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); } } - + // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) mi_segments_tld_t* segments_tld = &page->heap->tld->segments; @@ -420,20 +420,40 @@ void _mi_page_retire(mi_page_t* page) { // (or we end up retiring and re-allocating most of the time) // NOTE: refine this more: we should not retire if this // is the only page left with free blocks. It is not clear - // how to check this efficiently though... + // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) { - // if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { - if (pq->last==page && pq->first==page) { + if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { + if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - return; // dont't retire after all + page->retire_expire = 2; + mi_assert_internal(mi_page_all_free(page)); + return; // dont't free after all } } _mi_page_free(page, pq, false); } +// free retired pages: we don't need to look at the entire queues +// since we only retire pages that are the last one in a queue. +static void mi_page_retired_collect(mi_heap_t* heap) { + for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) { + mi_page_t* page = pq->first; + if (page != NULL && page->retire_expire != 0) { + if (mi_page_all_free(page)) { + page->retire_expire--; + if (page->retire_expire == 0) { + _mi_page_free(pq->first, pq, false); + } + } + else { + page->retire_expire = 0; + } + } + } +} + /* ----------------------------------------------------------- Initialize the initial free list in a page. @@ -499,7 +519,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co } // prepend to the free list (usually NULL) mi_block_set_next(page, blocks[current], page->free); // end of the list - page->free = free_start; + page->free = free_start; } static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) @@ -513,15 +533,15 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); const size_t bsize = page->block_size; mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity); - + // initialize a sequential free list - mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); + mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); mi_block_t* block = start; while(block <= last) { mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); mi_block_set_next(page,block,next); block = next; - } + } // prepend to free list (usually `NULL`) mi_block_set_next(page, last, page->free); page->free = start; @@ -619,6 +639,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->thread_freed == 0); mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); + mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) mi_assert_internal(page->key != 0); @@ -699,8 +720,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p } else { mi_assert(pq->first == page); + page->retire_expire = 0; } mi_assert_internal(page == NULL || mi_page_immediate_available(page)); + + // finally collect retired pages + mi_page_retired_collect(heap); return page; } @@ -719,6 +744,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { _mi_page_free_collect(page,false); } if (mi_page_immediate_available(page)) { + page->retire_expire = 0; return page; // fast path } } @@ -759,7 +785,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { // that frees the block can free the whole page and segment directly. static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { size_t block_size = _mi_os_good_alloc_size(size); - mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); + mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); if (page != NULL) { mi_assert_internal(mi_page_immediate_available(page)); @@ -777,7 +803,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { _mi_stat_increase(&heap->tld->stats.huge, block_size); _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); } - } + } return page; } From 2b108c8748410b81ca239c4f6a3639845d135587 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 3 Jan 2020 21:39:18 -0800 Subject: [PATCH 136/293] increase retire expiration to 4 --- include/mimalloc-internal.h | 1 + src/heap.c | 5 +++-- src/page.c | 10 +++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d41dfadc..cfbd9782 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -90,6 +90,7 @@ void _mi_page_unfull(mi_page_t* page); void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... void _mi_heap_delayed_free(mi_heap_t* heap); +void _mi_heap_collect_retired(mi_heap_t* heap, bool force); void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay); size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); diff --git a/src/heap.c b/src/heap.c index f90c4624..963cb982 100644 --- a/src/heap.c +++ b/src/heap.c @@ -46,7 +46,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void #if MI_DEBUG>=3 -static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { +static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { UNUSED(arg1); UNUSED(arg2); UNUSED(pq); @@ -59,7 +59,7 @@ static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page static bool mi_heap_is_valid(mi_heap_t* heap) { mi_assert_internal(heap!=NULL); - mi_heap_visit_pages(heap, &_mi_heap_page_is_valid, NULL, NULL); + mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL); return true; } #endif @@ -84,6 +84,7 @@ typedef enum mi_collect_e { static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) { UNUSED(arg2); UNUSED(heap); + mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_collect_t collect = *((mi_collect_t*)arg_collect); _mi_page_free_collect(page, collect >= ABANDON); if (mi_page_all_free(page)) { diff --git a/src/page.c b/src/page.c index f5f51a72..b0b500ca 100644 --- a/src/page.c +++ b/src/page.c @@ -426,7 +426,7 @@ void _mi_page_retire(mi_page_t* page) { if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = 2; + page->retire_expire = 4; mi_assert_internal(mi_page_all_free(page)); return; // dont't free after all } @@ -437,14 +437,14 @@ void _mi_page_retire(mi_page_t* page) { // free retired pages: we don't need to look at the entire queues // since we only retire pages that are the last one in a queue. -static void mi_page_retired_collect(mi_heap_t* heap) { +void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) { mi_page_t* page = pq->first; if (page != NULL && page->retire_expire != 0) { if (mi_page_all_free(page)) { page->retire_expire--; - if (page->retire_expire == 0) { - _mi_page_free(pq->first, pq, false); + if (force || page->retire_expire == 0) { + _mi_page_free(pq->first, pq, force); } } else { @@ -725,7 +725,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_assert_internal(page == NULL || mi_page_immediate_available(page)); // finally collect retired pages - mi_page_retired_collect(heap); + _mi_heap_collect_retired(heap,false); return page; } From d596f0856930a885007088ff52db8db051963da0 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 3 Jan 2020 22:06:27 -0800 Subject: [PATCH 137/293] fix thread_free read in assertion --- src/alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/alloc.c b/src/alloc.c index 8ee78338..bd81aba0 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -142,9 +142,10 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { // The decoded value is in the same page (or NULL). // Walk the free lists to verify positively if it is already freed + mi_thread_free_t tf = (mi_thread_free_t)mi_atomic_read_relaxed(mi_atomic_cast(uintptr_t, &page->thread_free)); if (mi_list_contains(page, page->free, block) || mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) + mi_list_contains(page, mi_tf_block(tf), block)) { _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); return true; From a2a9230ad6e404e23a724fa8c820e3533a961716 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 3 Jan 2020 22:52:52 -0800 Subject: [PATCH 138/293] remove empty page removal on page search (no longer needed with retired collection and delayed freeing) --- src/page.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/src/page.c b/src/page.c index b0b500ca..c38d7740 100644 --- a/src/page.c +++ b/src/page.c @@ -660,9 +660,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) { // search through the pages in "next fit" order - mi_page_t* rpage = NULL; size_t count = 0; - size_t page_free_count = 0; mi_page_t* page = pq->first; while( page != NULL) { @@ -674,20 +672,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // 1. if the page contains free blocks, we are done if (mi_page_immediate_available(page)) { - // If all blocks are free, we might retire this page instead. - // do this at most 8 times to bound allocation time. - // (note: this can happen if a page was earlier not retired due - // to having neighbours that were mostly full or due to concurrent frees) - if (page_free_count < 8 && mi_page_all_free(page)) { - page_free_count++; - if (rpage != NULL) _mi_page_free(rpage,pq,false); - rpage = page; - page = next; - continue; // and keep looking - } - else { - break; // pick this one - } + break; // pick this one } // 2. Try to extend @@ -707,14 +692,6 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_stat_counter_increase(heap->tld->stats.searches,count); - if (page == NULL) { - page = rpage; - rpage = NULL; - } - if (rpage != NULL) { - _mi_page_free(rpage,pq,false); - } - if (page == NULL) { page = mi_page_fresh(heap, pq); } From 59fa2862941fe6c07c526d2221e2557492b3b1ab Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 4 Jan 2020 17:32:50 -0800 Subject: [PATCH 139/293] fix bug where continue would wrongly exit the do-while loop for delayed freeing --- src/page.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/page.c b/src/page.c index c38d7740..0df32f4c 100644 --- a/src/page.c +++ b/src/page.c @@ -119,23 +119,22 @@ bool _mi_page_is_valid(mi_page_t* page) { } #endif - -void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) { mi_thread_free_t tfree; mi_thread_free_t tfreex; - + mi_delayed_t old_delay; do { - tfreex = tfree = page->thread_free; - if (mi_unlikely(mi_tf_delayed(tfree) < MI_DELAYED_FREEING)) { - tfreex = mi_tf_set_delayed(tfree,delay); - } - else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) { + tfree = mi_atomic_read_relaxed(&page->thread_free); + tfreex = mi_tf_set_delayed(tfree, delay); + old_delay = mi_tf_delayed(tfree); + if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. - continue; // and try again } - } - while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal - !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + else if (delay == old_delay) { + break; // avoid atomic operation if already equal + } + } while ((old_delay == MI_DELAYED_FREEING) || + !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree)); } From 45582d1fb5e076a334fb9c5fd704da9b7312dc5b Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 5 Jan 2020 13:58:49 -0800 Subject: [PATCH 140/293] revert a2a9230 (remove empty page removal on search): this is not generally valid when concurrent frees do not always add to thread_delayed_free. --- src/page.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index 0df32f4c..78570ab0 100644 --- a/src/page.c +++ b/src/page.c @@ -659,7 +659,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) { // search through the pages in "next fit" order + mi_page_t* rpage = NULL; size_t count = 0; + size_t page_free_count = 0; mi_page_t* page = pq->first; while( page != NULL) { @@ -671,7 +673,20 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // 1. if the page contains free blocks, we are done if (mi_page_immediate_available(page)) { - break; // pick this one + // If all blocks are free, we might retire this page instead. + // do this at most 8 times to bound allocation time. + // (note: this can happen if a page was earlier not retired due + // to having neighbours that were mostly full or due to concurrent frees) + if (page_free_count < 8 && mi_page_all_free(page)) { + page_free_count++; + if (rpage != NULL) _mi_page_free(rpage,pq,false); + rpage = page; + page = next; + continue; // and keep looking + } + else { + break; // pick this one + } } // 2. Try to extend @@ -691,6 +706,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_stat_counter_increase(heap->tld->stats.searches,count); + if (page == NULL) { + page = rpage; + rpage = NULL; + } + if (rpage != NULL) { + _mi_page_free(rpage,pq,false); + } + if (page == NULL) { page = mi_page_fresh(heap, pq); } From d8d69c2c94d0314e546f91bae8f19826aedf1e14 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 5 Jan 2020 22:07:16 -0800 Subject: [PATCH 141/293] disable MAP_NORESERVE on huge pages --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index d7126e70..c9a04d27 100644 --- a/src/os.c +++ b/src/os.c @@ -331,7 +331,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); } else { - int lflags = flags; + int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux int lfd = fd; #ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; From 743e89173819a9fe3283fb94f4f6830d2f648186 Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Mon, 6 Jan 2020 16:18:22 -0800 Subject: [PATCH 142/293] add stl mimalloc wrapper --- CMakeLists.txt | 3 +- ide/vs2017/mimalloc-override.vcxproj | 3 +- ide/vs2017/mimalloc.vcxproj | 3 +- ide/vs2019/mimalloc-override.vcxproj | 3 +- ide/vs2019/mimalloc.vcxproj | 3 +- include/mimalloc-stl-allocator.h | 44 ++++++++++++++++++++++++++++ test/test-api.c | 31 ++++++++++++++++++++ 7 files changed, 85 insertions(+), 5 deletions(-) create mode 100644 include/mimalloc-stl-allocator.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 467fad95..dcbdefef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,6 +187,7 @@ install(TARGETS mimalloc-static EXPORT mimalloc DESTINATION ${mi_install_dir}) install(FILES include/mimalloc.h DESTINATION ${mi_install_dir}/include) install(FILES include/mimalloc-override.h DESTINATION ${mi_install_dir}/include) install(FILES include/mimalloc-new-delete.h DESTINATION ${mi_install_dir}/include) +install(FILES include/mimalloc-stl-allocator.h DESTINATION ${mi_install_dir}/include) install(FILES cmake/mimalloc-config.cmake DESTINATION ${mi_install_dir}/cmake) install(FILES cmake/mimalloc-config-version.cmake DESTINATION ${mi_install_dir}/cmake) install(EXPORT mimalloc DESTINATION ${mi_install_dir}/cmake) @@ -233,7 +234,7 @@ if (MI_BUILD_TESTS MATCHES "ON") target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress PRIVATE include) target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static ${mi_libraries}) - + enable_testing() add_test(test_api, mimalloc-test-api) add_test(test_stress, mimalloc-test-stress) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 511c0fab..e0a6d85b 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -1,4 +1,4 @@ - + @@ -214,6 +214,7 @@ + diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 6147c349..ff6c8edb 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -1,4 +1,4 @@ - + @@ -239,6 +239,7 @@ + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 96a8924f..e6416e05 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -1,4 +1,4 @@ - + @@ -214,6 +214,7 @@ + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 28e96d71..ffede6ca 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -1,4 +1,4 @@ - + @@ -239,6 +239,7 @@ + diff --git a/include/mimalloc-stl-allocator.h b/include/mimalloc-stl-allocator.h new file mode 100644 index 00000000..11ba30fb --- /dev/null +++ b/include/mimalloc-stl-allocator.h @@ -0,0 +1,44 @@ +#pragma once +#ifndef MIMALLOC_STL_ALLOCATOR_H +#define MIMALLOC_STL_ALLOCATOR_H + +#ifdef __cplusplus +/* ---------------------------------------------------------------------------- +This header can be used to hook mimalloc into STL containers in place of +std::allocator. +-----------------------------------------------------------------------------*/ +#include +#include // true_type + +#pragma warning(disable: 4100) + +template +struct mi_stl_allocator { + typedef T value_type; + + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + using is_always_equal = std::true_type; + + mi_stl_allocator() noexcept {} + mi_stl_allocator(const mi_stl_allocator& other) noexcept {} + template + mi_stl_allocator(const mi_stl_allocator& other) noexcept {} + + T* allocate(size_t n, const void* hint = 0) { + return (T*)mi_mallocn(n, sizeof(T)); + } + + void deallocate(T* p, size_t n) { + mi_free(p); + } +}; + +template +bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return true; } +template +bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return false; } + +#endif // __cplusplus +#endif // MIMALLOC_STL_ALLOCATOR_H diff --git a/test/test-api.c b/test/test-api.c index bd2291da..7a9ee785 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -25,8 +25,10 @@ we therefore test the API over various inputs. Please add more tests :-) #include #include #include +#include #include "mimalloc.h" #include "mimalloc-internal.h" +#include "mimalloc-stl-allocator.h" // --------------------------------------------------------------------------- // Test macros: CHECK(name,predicate) and CHECK_BODY(name,body) @@ -61,6 +63,8 @@ static int failed = 0; // --------------------------------------------------------------------------- bool test_heap1(); bool test_heap2(); +bool test_stl_allocator1(); +bool test_stl_allocator2(); // --------------------------------------------------------------------------- // Main testing @@ -150,6 +154,9 @@ int main() { mi_free(s); }); + CHECK("stl_allocator1", test_stl_allocator1()); + CHECK("stl_allocator2", test_stl_allocator2()); + // --------------------------------------------------- // Done // ---------------------------------------------------[] @@ -182,3 +189,27 @@ bool test_heap2() { mi_free(p2); return true; } + +bool test_stl_allocator1() { +#ifdef __cplusplus + std::vector> vec; + vec.push_back(1); + vec.pop_back(); + return vec.size() == 0; +#else + return true; +#endif +} + +bool test_stl_allocator2() { +#ifdef __cplusplus + struct some_struct { int i; int j; double z; }; + + std::vector> vec; + vec.push_back(some_struct()); + vec.pop_back(); + return vec.size() == 0; +#else + return true; +#endif +} From d97c56d4c10d9161d7d5b8bec43f67b8f291b67f Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Mon, 6 Jan 2020 16:25:21 -0800 Subject: [PATCH 143/293] fix unintended diff --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dcbdefef..93560951 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -234,7 +234,7 @@ if (MI_BUILD_TESTS MATCHES "ON") target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress PRIVATE include) target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static ${mi_libraries}) - + enable_testing() add_test(test_api, mimalloc-test-api) add_test(test_stress, mimalloc-test-stress) From 0a2520490b951d791ebc9b34e8eae69e65fbeda6 Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Mon, 6 Jan 2020 16:44:55 -0800 Subject: [PATCH 144/293] only include vector header when compiling c++ --- test/test-api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/test-api.c b/test/test-api.c index 7a9ee785..f93884d0 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -25,7 +25,11 @@ we therefore test the API over various inputs. Please add more tests :-) #include #include #include + +#ifdef __cplusplus #include +#endif + #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-stl-allocator.h" From 4223caac0fa95b900f89963d99f7c0d1d03a2217 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 6 Jan 2020 22:08:21 -0800 Subject: [PATCH 145/293] on Linux dynamically detect if getrandom is supported and fall back to /dev/urandom if needed --- src/random.c | 48 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/src/random.c b/src/random.c index af6cd876..c40a96da 100644 --- a/src/random.c +++ b/src/random.c @@ -155,9 +155,9 @@ uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { /* ---------------------------------------------------------------------------- To initialize a fresh random context we rely on the OS: -- windows: BCryptGenRandom -- bsd,wasi: arc4random_buf -- linux: getrandom +- Windows : BCryptGenRandom +- osX,bsd,wasi: arc4random_buf +- Linux : getrandom,/dev/urandom If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. -----------------------------------------------------------------------------*/ @@ -185,9 +185,47 @@ static bool os_random_buf(void* buf, size_t buf_len) { return true; } #elif defined(__linux__) -#include +#include +#include +#include +#include +#include +#include static bool os_random_buf(void* buf, size_t buf_len) { - return (getrandom(buf, buf_len, GRND_NONBLOCK) == (ssize_t)buf_len); + // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` + // and for the latter the actual `getrandom` call is not always defined. + // (see ) + // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. +#ifdef SYS_getrandom + #ifndef GRND_NONBLOCK + #define GRND_NONBLOCK (1) + #endif + static volatile _Atomic(uintptr_t) no_getrandom; // = 0 + if (mi_atomic_read(&no_getrandom)==0) { + ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); + if (ret >= 0) return (buf_len == (size_t)ret); + if (ret != ENOSYS) return false; + mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom + } +#endif + int flags = O_RDONLY; + #if defined(O_CLOEXEC) + flags |= O_CLOEXEC; + #endif + int fd = open("/dev/urandom", flags, 0); + if (fd < 0) return false; + size_t count = 0; + while(count < buf_len) { + ssize_t ret = read(fd, (char*)buf + count, buf_len - count); + if (ret<=0) { + if (errno!=EAGAIN && errno!=EINTR) break; + } + else { + count += ret; + } + } + close(fd); + return (count==buf_len); } #else static bool os_random_buf(void* buf, size_t buf_len) { From d4ab0ff08c46bb87ec666e91cecd5b2675388be2 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 7 Jan 2020 14:15:37 -0800 Subject: [PATCH 146/293] fix timeout on huge page reservation if set to 0 --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 90ea2b40..b5d41a1a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -325,7 +325,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t if (numa_count <= 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; - const size_t timeout_per = (timeout_msecs / numa_count) + 50; + const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); // reserve evenly among numa nodes for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { From 50b3f6d7aef19abbe6a985d9be6fa0f7aeb11098 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 12:58:07 -0800 Subject: [PATCH 147/293] fix assertion --- src/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index 3d6a22f5..ee84f755 100644 --- a/src/memory.c +++ b/src/memory.c @@ -308,7 +308,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { // some blocks are still reset mi_assert_internal(!info.is_large); - mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); + mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed bool reset_zero = false; From 5d2f111f64a788108466e89797d6ddafde1163f4 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 12:59:20 -0800 Subject: [PATCH 148/293] make the stress test do more iterations under a smaller load to stay under 1GiB committed and increase thread interaction --- test/test-stress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index b549e1b4..924dbce1 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -26,8 +26,8 @@ terms of the MIT license. // // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int SCALE = 50; // scaling factor -static int ITER = 10; // N full iterations destructing and re-creating all threads +static int SCALE = 10; // scaling factor +static int ITER = 50; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor @@ -209,7 +209,7 @@ int main(int argc, char** argv) { } mi_collect(false); #ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif } From 683d8998d4d56fbb92e447029f36d8ddbfbbf452 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 17:45:38 -0800 Subject: [PATCH 149/293] fix potential A-B-A problem with segment abandonment; noticed by Manual Poeter and Sam Gross --- include/mimalloc-types.h | 2 +- src/segment.c | 80 ++++++++++++++++++++++++++++------------ test/test-stress.c | 6 +-- 3 files changed, 60 insertions(+), 28 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 68529c3f..da9bfbac 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -234,7 +234,7 @@ typedef struct mi_segment_s { // segment fields struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` struct mi_segment_s* prev; - volatile _Atomic(struct mi_segment_s*) abandoned_next; + struct mi_segment_s* abandoned_next; size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t used; // count of pages in use (`used <= capacity`) size_t capacity; // count of available pages (`#free + used`) diff --git a/src/segment.c b/src/segment.c index 676df00a..97859fa9 100644 --- a/src/segment.c +++ b/src/segment.c @@ -663,7 +663,28 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) // are "abandoned" and will be reclaimed by other threads to // reuse their pages and/or free them eventually static volatile _Atomic(mi_segment_t*) abandoned; // = NULL; -static volatile _Atomic(uintptr_t) abandoned_count; // = 0; +static volatile _Atomic(uintptr_t) abandoned_count; // = 0; approximate count of abandoned segments + +// prepend a list of abandoned segments atomically to the global abandoned list; O(n) +static void mi_segments_prepend_abandoned(mi_segment_t* first) { + if (first == NULL) return; + + // first try if the abandoned list happens to be NULL + if (mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, NULL)) return; + + // if not, find the end of the list + mi_segment_t* last = first; + while (last->abandoned_next != NULL) { + last = last->abandoned_next; + } + + // and atomically prepend + mi_segment_t* next; + do { + next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned)); + last->abandoned_next = next; + } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, next)); +} static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); @@ -679,12 +700,9 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; - mi_segment_t* next; - do { - next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&abandoned)); - mi_atomic_write_ptr(mi_atomic_cast(void*,&segment->abandoned_next), next); - } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), segment, next)); - mi_atomic_increment(&abandoned_count); + segment->abandoned_next = NULL; + mi_segments_prepend_abandoned(segment); // prepend one-element list + mi_atomic_increment(&abandoned_count); // keep approximate count } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -701,24 +719,35 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { } bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) { - uintptr_t reclaimed = 0; - uintptr_t atmost; - if (try_all) { - atmost = abandoned_count+16; // close enough - } - else { - atmost = abandoned_count/8; // at most 1/8th of all outstanding (estimated) + // To avoid the A-B-A problem, grab the entire list atomically + mi_segment_t* segment = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned)); // pre-read to avoid expensive atomic operations + if (segment == NULL) return false; + segment = (mi_segment_t*)mi_atomic_exchange_ptr(mi_atomic_cast(void*, &abandoned), NULL); + if (segment == NULL) return false; + + // we got a non-empty list + if (!try_all) { + // take at most 1/8th of the list and append the rest back to the abandoned list again + // this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem) + // and probably ok since the length will tend to be not too large. + uintptr_t atmost = mi_atomic_read(&abandoned_count)/8; // at most 1/8th of all outstanding (estimated) if (atmost < 8) atmost = 8; // but at least 8 + + // find the split point + mi_segment_t* last = segment; + while (last->abandoned_next != NULL && atmost > 0) { + last = last->abandoned_next; + atmost--; + } + // split the list and push back the remaining segments + mi_segment_t* next = last->abandoned_next; + last->abandoned_next = NULL; + mi_segments_prepend_abandoned(next); } - // for `atmost` `reclaimed` abandoned segments... - while(atmost > reclaimed) { - // try to claim the head of the abandoned segments - mi_segment_t* segment; - do { - segment = (mi_segment_t*)abandoned; - } while(segment != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), (mi_segment_t*)segment->abandoned_next, segment)); - if (segment==NULL) break; // stop early if no more segments available + // reclaim all segments that we kept + while(segment != NULL) { + mi_segment_t* const next = segment->abandoned_next; // save the next segment // got it. mi_atomic_decrement(&abandoned_count); @@ -754,14 +783,17 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_free(segment,false,tld); } else { - reclaimed++; // add its free pages to the the current thread free small segment queue if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { mi_segment_insert_in_free_queue(segment,tld); } } + + // go on + segment = next; } - return (reclaimed>0); + + return true; } diff --git a/test/test-stress.c b/test/test-stress.c index 924dbce1..23137b97 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -135,9 +135,9 @@ static void stress(intptr_t tid) { allocs--; if (data_top >= data_size) { data_size += 100000; - data = (void**)custom_realloc(data, data_size * sizeof(void*)); + data = (void**)custom_realloc(data, data_size * sizeof(void*)); } - data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r); + data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r); } else { // 25% retain @@ -209,7 +209,7 @@ int main(int argc, char** argv) { } mi_collect(false); #ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); } #endif } From 940df53b0afc8b114676bf3fd41b9505db2abf0d Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 17:51:11 -0800 Subject: [PATCH 150/293] fix iteration count display in stress test --- test/test-stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test-stress.c b/test/test-stress.c index 23137b97..d295f741 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -209,7 +209,7 @@ int main(int argc, char** argv) { } mi_collect(false); #ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif } From 12ef2816ed71be907647a190f4139c6639d49dde Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 19:00:03 -0800 Subject: [PATCH 151/293] fix bug exposed by commit 59fa286 where reclaimed pages could be stuck to NEVER_DELAYED --- include/mimalloc-internal.h | 2 +- src/heap.c | 4 ++-- src/page.c | 13 +++++++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cfbd9782..3042e6f9 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -92,7 +92,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // void _mi_heap_delayed_free(mi_heap_t* heap); void _mi_heap_collect_retired(mi_heap_t* heap, bool force); -void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay); +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); void _mi_deferred_free(mi_heap_t* heap, bool force); diff --git a/src/heap.c b/src/heap.c index 963cb982..5c1f8d38 100644 --- a/src/heap.c +++ b/src/heap.c @@ -103,7 +103,7 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq UNUSED(arg2); UNUSED(heap); UNUSED(pq); - _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); return true; // don't break } @@ -242,7 +242,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ UNUSED(pq); // ensure no more thread_delayed_free will be added - _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); // stats if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { diff --git a/src/page.c b/src/page.c index 78570ab0..7491bd61 100644 --- a/src/page.c +++ b/src/page.c @@ -119,7 +119,7 @@ bool _mi_page_is_valid(mi_page_t* page) { } #endif -void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) { +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { mi_thread_free_t tfree; mi_thread_free_t tfreex; mi_delayed_t old_delay; @@ -133,11 +133,13 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) { else if (delay == old_delay) { break; // avoid atomic operation if already equal } + else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { + break; // leave never set + } } while ((old_delay == MI_DELAYED_FREEING) || !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree)); } - /* ----------------------------------------------------------- Page collect the `local_free` and `thread_free` lists ----------------------------------------------------------- */ @@ -229,9 +231,12 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_internal(page->heap == NULL); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(!page->is_reset); + mi_assert_internal(mi_tf_delayed(page->thread_free) == MI_NEVER_DELAYED_FREE); _mi_page_free_collect(page,false); mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); mi_page_queue_push(heap, pq, page); + mi_assert_internal(page->heap != NULL); + _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, true); // override never (after push so heap is set) mi_assert_expensive(_mi_page_is_valid(page)); } @@ -308,7 +313,7 @@ void _mi_page_unfull(mi_page_t* page) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(mi_page_is_in_full(page)); - _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE); + _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, false); if (!mi_page_is_in_full(page)) return; mi_heap_t* heap = page->heap; @@ -324,7 +329,7 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(!mi_page_immediate_available(page)); mi_assert_internal(!mi_page_is_in_full(page)); - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE); + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false); if (mi_page_is_in_full(page)) return; mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page); From 8f75444e7a07d8a6a56302855ad1094121bd4c90 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:21:32 -0800 Subject: [PATCH 152/293] fix windows debug build at MI_DEBUG=2 --- src/heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/heap.c b/src/heap.c index 5c1f8d38..4a589e5c 100644 --- a/src/heap.c +++ b/src/heap.c @@ -45,7 +45,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void } -#if MI_DEBUG>=3 +#if MI_DEBUG>=2 static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { UNUSED(arg1); UNUSED(arg2); From 403276d11e10bebb1d20c93b210258de3f02d995 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:27:18 -0800 Subject: [PATCH 153/293] build release and debug build on Windows --- azure-pipelines.yml | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 41d67f86..5056ee34 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -13,16 +13,24 @@ jobs: pool: vmImage: windows-2019 + strategy: + matrix: + Debug: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + Release: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release steps: - task: CMake@1 inputs: - workingDirectory: 'build' - cmakeArgs: .. + workingDirectory: $(BuildType) + cmakeArgs: .. $(cmakeExtraArgs) - task: MSBuild@1 inputs: - solution: build/libmimalloc.sln - - upload: $(Build.SourcesDirectory)/build - artifact: windows + solution: $(BuildType)/libmimalloc.sln + - upload: $(Build.SourcesDirectory)/$(BuildType) + artifact: mimalloc-windows-$(BuildType) - job: displayName: Linux @@ -75,7 +83,7 @@ jobs: displayName: Ctest - upload: $(Build.SourcesDirectory)/$(BuildType) - artifact: ubuntu-$(BuildType) + artifact: mimalloc-ubuntu-$(BuildType) - job: displayName: macOS @@ -89,4 +97,4 @@ jobs: cmakeArgs: .. - script: make -j$(sysctl -n hw.ncpu) -C build - upload: $(Build.SourcesDirectory)/build - artifact: macos + artifact: mimalloc-macos From ce3f327f211418aaaac874a961ea92fe1fb8e013 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:40:57 -0800 Subject: [PATCH 154/293] add test pass to Windows build --- azure-pipelines.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5056ee34..b9376e52 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -29,6 +29,7 @@ jobs: - task: MSBuild@1 inputs: solution: $(BuildType)/libmimalloc.sln + - task: CTest@1 - upload: $(Build.SourcesDirectory)/$(BuildType) artifact: mimalloc-windows-$(BuildType) From 7575b58d7ac4abe84b16c4befefdfe1618ce4347 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:46:56 -0800 Subject: [PATCH 155/293] fix test on Windows in azure pipelines --- azure-pipelines.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b9376e52..9da5ffa5 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -29,7 +29,10 @@ jobs: - task: MSBuild@1 inputs: solution: $(BuildType)/libmimalloc.sln - - task: CTest@1 + - displayName: CTest + script: | + cd $(BuildType) + ctest - upload: $(Build.SourcesDirectory)/$(BuildType) artifact: mimalloc-windows-$(BuildType) From 313d4b8ffd1bb741a3f4ab7b883b71e4913c8c5d Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:47:40 -0800 Subject: [PATCH 156/293] fix test on Windows in azure pipelines --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 9da5ffa5..ad5f42cb 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -29,10 +29,10 @@ jobs: - task: MSBuild@1 inputs: solution: $(BuildType)/libmimalloc.sln - - displayName: CTest - script: | + - script: | cd $(BuildType) ctest + displayName: CTest - upload: $(Build.SourcesDirectory)/$(BuildType) artifact: mimalloc-windows-$(BuildType) From be10ebea35652e7cde14c42a8a9ab972efaafb9c Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:54:56 -0800 Subject: [PATCH 157/293] build debug and secure versions on macOS in Azure pipelines --- azure-pipelines.yml | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ad5f42cb..f88b2e1a 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -21,6 +21,9 @@ jobs: Release: BuildType: release cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + Secure: + BuildType: secure + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON steps: - task: CMake@1 inputs: @@ -32,7 +35,7 @@ jobs: - script: | cd $(BuildType) ctest - displayName: CTest + displayName: CTest - upload: $(Build.SourcesDirectory)/$(BuildType) artifact: mimalloc-windows-$(BuildType) @@ -73,19 +76,15 @@ jobs: CXX: clang++ BuildType: secure-clang cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON - steps: - task: CMake@1 inputs: workingDirectory: $(BuildType) cmakeArgs: .. $(cmakeExtraArgs) - - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: make test -C $(BuildType) - displayName: Ctest - + displayName: CTest - upload: $(Build.SourcesDirectory)/$(BuildType) artifact: mimalloc-ubuntu-$(BuildType) @@ -94,11 +93,25 @@ jobs: pool: vmImage: macOS-10.14 + strategy: + matrix: + Debug: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + Release: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + Secure: + BuildType: secure + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON steps: - task: CMake@1 inputs: - workingDirectory: 'build' - cmakeArgs: .. - - script: make -j$(sysctl -n hw.ncpu) -C build - - upload: $(Build.SourcesDirectory)/build - artifact: mimalloc-macos + workingDirectory: $(BuildType) + cmakeArgs: .. $(cmakeExtraArgs) + - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType) + displayName: Make + - script: make test -C $(BuildType) + displayName: CTest + - upload: $(Build.SourcesDirectory)/$(BuildType) + artifact: mimalloc-macos-$(BuildType) From 5f61a9e89673c6a361b4b34b4db258181e8e415b Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 9 Jan 2020 17:52:28 -0800 Subject: [PATCH 158/293] add mprotect error when the mmap limit might be reached in secure mode (see issue #77) --- src/os.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/os.c b/src/os.c index c9a04d27..b5bd0ad9 100644 --- a/src/os.c +++ b/src/os.c @@ -596,6 +596,18 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } +static void mi_mprotect_hint(int err) { +#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page + if (err == ENOMEM) { + _mi_warning_message("the previous warning may have been caused by a low memory map limit.\n" + " On Linux this is controlled by the vm.max_map_count. For example:\n" + " > sudo sysctl -w vm.max_map_count=262144\n"); + } +#else + UNUSED(err); +#endif +} + // Commit/Decommit memory. // Usuelly commit is aligned liberal, while decommit is aligned conservative. // (but not for the reset version where we want commit to be conservative as well) @@ -644,6 +656,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ #endif if (err != 0) { _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); + mi_mprotect_hint(err); } mi_assert_internal(err == 0); return (err == 0); @@ -762,6 +775,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { #endif if (err != 0) { _mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + mi_mprotect_hint(err); } return (err == 0); } From 65f4f5144bef1a7145ac95a147ac01c7751a9310 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 13 Jan 2020 17:06:25 -0800 Subject: [PATCH 159/293] fix out-of-bounds error in huge OS page bitmap --- src/arena.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index b5d41a1a..7f1a1caf 100644 --- a/src/arena.c +++ b/src/arena.c @@ -282,10 +282,10 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; } - _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); + _mi_verbose_message("reserved %zu gb huge pages (of the %zu gb requested)\n", pages_reserved, pages); size_t bcount = mi_block_count_of_size(hsize); - size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS; + size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { @@ -300,11 +300,12 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec arena->is_zero_init = true; arena->is_committed = true; arena->search_idx = 0; - arena->blocks_dirty = &arena->blocks_inuse[bcount]; + arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap arena->blocks_committed = NULL; // the bitmaps are already zero initialized due to os_alloc // just claim leftover blocks if needed - size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; + ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; + mi_assert_internal(post >= 0); if (post > 0) { // don't use leftover bits at the end mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); From 941c55ee42e1b3a14b27a1df1ceab3ebfcbcf46d Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 14 Jan 2020 21:47:18 -0800 Subject: [PATCH 160/293] wip: first implementation of page free list on segments for effecient delayed page reset --- include/mimalloc-types.h | 2 + src/init.c | 7 +- src/options.c | 2 +- src/segment.c | 307 +++++++++++++++++++++++++++++++-------- 4 files changed, 251 insertions(+), 67 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index da9bfbac..51306808 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -417,6 +417,8 @@ typedef struct mi_os_tld_s { typedef struct mi_segments_tld_s { mi_segment_queue_t small_free; // queue of segments with free small pages mi_segment_queue_t medium_free; // queue of segments with free medium pages + mi_page_queue_t small_pages_free; // page queue of free small pages + mi_page_queue_t medium_pages_free; // page queue of free medium pages size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments diff --git a/src/init.c b/src/init.c index 3df854cf..085a5011 100644 --- a/src/init.c +++ b/src/init.c @@ -105,9 +105,12 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; static mi_tld_t tld_main = { 0, false, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments + { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, {NULL ,NULL, 0}, + 0, 0, 0, 0, 0, 0, NULL, + tld_main_stats, tld_main_os + }, // segments { 0, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { MI_STATS_NULL } // stats }; #if MI_INTPTR_SIZE==8 diff --git a/src/options.c b/src/options.c index 0d3bd393..77205713 100644 --- a/src/options.c +++ b/src/options.c @@ -70,7 +70,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/segment.c b/src/segment.c index 97859fa9..fb5ea0ec 100644 --- a/src/segment.c +++ b/src/segment.c @@ -43,7 +43,7 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_ ----------------------------------------------------------- */ #if (MI_DEBUG>=3) -static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { +static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, const mi_segment_t* segment) { mi_assert_internal(segment != NULL); mi_segment_t* list = queue->first; while (list != NULL) { @@ -90,7 +90,7 @@ static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi else return NULL; } -static mi_segment_queue_t* mi_segment_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { +static mi_segment_queue_t* mi_segment_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) { return mi_segment_free_queue_of_kind(segment->page_kind, tld); } @@ -113,7 +113,7 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t ----------------------------------------------------------- */ #if (MI_DEBUG>=2) -static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { +static bool mi_segment_is_in_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); if (in_queue) { @@ -123,7 +123,7 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t } #endif -static size_t mi_segment_page_size(mi_segment_t* segment) { +static size_t mi_segment_page_size(const mi_segment_t* segment) { if (segment->capacity > 1) { mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); return ((size_t)1 << segment->page_shift); @@ -134,15 +134,39 @@ static size_t mi_segment_page_size(mi_segment_t* segment) { } } +static mi_page_queue_t* mi_segment_page_free_queue(mi_page_kind_t kind, mi_segments_tld_t* tld) { + if (kind==MI_PAGE_SMALL) return &tld->small_pages_free; + else if (kind==MI_PAGE_MEDIUM) return &tld->medium_pages_free; + else return NULL; +} + + #if (MI_DEBUG>=3) -static bool mi_segment_is_valid(mi_segment_t* segment) { +static bool mi_segment_page_free_contains(mi_page_kind_t kind, const mi_page_t* page, mi_segments_tld_t* tld) { + const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld); + if (pq == NULL) return false; + mi_page_t* p = pq->first; + while (p != NULL) { + if (p == page) return true; + p = p->next; + } + return false; +} + +static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment != NULL); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->used <= segment->capacity); mi_assert_internal(segment->abandoned <= segment->used); size_t nfree = 0; for (size_t i = 0; i < segment->capacity; i++) { - if (!segment->pages[i].segment_in_use) nfree++; + const mi_page_t* const page = &segment->pages[i]; + if (!page->segment_in_use) { + nfree++; + } + else { + mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); + } } mi_assert_internal(nfree + segment->used == segment->capacity); mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 @@ -152,6 +176,20 @@ static bool mi_segment_is_valid(mi_segment_t* segment) { } #endif +static bool mi_segment_page_free_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_page_kind_t kind = _mi_page_segment(page)->page_kind; + if (page->next != NULL || page->prev != NULL) { + mi_assert_internal(mi_segment_page_free_contains(kind, page, tld)); + return false; + } + if (kind > MI_PAGE_MEDIUM) return true; + // both next and prev are NULL, check for singleton list + const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld); + mi_assert_internal(pq!=NULL); + return (pq->first != page && pq->last != page); +} + + /* ----------------------------------------------------------- Guard pages ----------------------------------------------------------- */ @@ -232,6 +270,102 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, } +/* ----------------------------------------------------------- + The free page queue +----------------------------------------------------------- */ + +static void mi_segment_page_free_set_expire(mi_page_t* page) { + *((intptr_t*)(&page->heap)) = _mi_clock_now() + mi_option_get(mi_option_reset_delay); +} + +static mi_msecs_t mi_segment_page_free_get_expire(mi_page_t* page) { + return *((intptr_t*)(&page->heap)); +} + +static void mi_segment_page_free_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); + mi_assert_internal(!page->segment_in_use); + mi_assert_internal(_mi_page_segment(page) == segment); + mi_assert_internal(mi_segment_page_free_not_in_queue(page,tld)); + mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); + mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld); + // push on top + mi_segment_page_free_set_expire(page); + page->next = pq->first; + page->prev = NULL; + if (pq->first == NULL) { + mi_assert_internal(pq->last == NULL); + pq->first = pq->last = page; + } + else { + pq->first->prev = page; + pq->first = page; + } +} + +static void mi_segment_page_free_remove(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + if (segment->page_kind > MI_PAGE_MEDIUM) return; + if (mi_segment_page_free_not_in_queue(page,tld)) return; + + mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld); + mi_assert_internal(pq!=NULL); + mi_assert_internal(_mi_page_segment(page)==segment); + mi_assert_internal(!page->segment_in_use); + mi_assert_internal(mi_segment_page_free_contains(segment->page_kind, page, tld)); + if (page->prev != NULL) page->prev->next = page->next; + if (page->next != NULL) page->next->prev = page->prev; + if (page == pq->last) pq->last = page->prev; + if (page == pq->first) pq->first = page->next; + page->next = page->prev = NULL; + page->heap = NULL; +} + +static void mi_segment_page_free_remove_all(mi_segment_t* segment, mi_segments_tld_t* tld) { + if (segment->page_kind > MI_PAGE_MEDIUM) return; + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (!page->segment_in_use) { + mi_segment_page_free_remove(segment, page, tld); + } + } +} + +static mi_page_t* mi_segment_page_free_top(mi_page_kind_t kind, mi_segments_tld_t* tld) { + mi_assert_internal(kind <= MI_PAGE_MEDIUM); + mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld); + return pq->first; +} + +static void mi_segment_page_free_reset_delayedx(mi_msecs_t now, mi_page_kind_t kind, mi_segments_tld_t* tld) { + mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld); + mi_assert_internal(pq != NULL); + mi_page_t* page = pq->last; + while (page != NULL && (now - mi_segment_page_free_get_expire(page)) >= 0) { + mi_page_t* const prev = page->prev; + mi_page_reset(_mi_page_segment(page), page, 0, tld); + page->heap = NULL; + page->prev = page->next = NULL; + page = prev; + } + pq->last = page; + if (page != NULL){ + page->next = NULL; + } + else { + pq->first = NULL; + } +} + +static void mi_segment_page_free_reset_delayed(mi_segments_tld_t* tld) { + if (!mi_option_is_enabled(mi_option_page_reset)) return; + mi_msecs_t now = _mi_clock_now(); + mi_segment_page_free_reset_delayedx(now, MI_PAGE_SMALL, tld); + mi_segment_page_free_reset_delayedx(now, MI_PAGE_MEDIUM, tld); +} + + + + /* ----------------------------------------------------------- Segment size calculations ----------------------------------------------------------- */ @@ -407,6 +541,10 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache == NULL); + mi_assert_internal(tld->small_pages_free.first == NULL); + mi_assert_internal(tld->medium_pages_free.first == NULL); + mi_assert_internal(tld->small_free.first == NULL); + mi_assert_internal(tld->medium_free.first == NULL); } @@ -532,9 +670,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { - UNUSED(force); - //fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment); + UNUSED(force); mi_assert(segment != NULL); + mi_segment_page_free_remove_all(segment, tld); mi_segment_remove_from_free_queue(segment,tld); mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); @@ -561,37 +699,38 @@ static bool mi_segment_has_free(const mi_segment_t* segment) { return (segment->used < segment->capacity); } -static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_assert_internal(mi_segment_has_free(segment)); - mi_assert_expensive(mi_segment_is_valid(segment)); - for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use) { - // set in-use before doing unreset to prevent delayed reset - page->segment_in_use = true; - segment->used++; - if (!page->is_committed) { - mi_assert_internal(!segment->mem_is_fixed); - mi_assert_internal(!page->is_reset); - page->is_committed = true; - if (segment->page_kind < MI_PAGE_LARGE || !mi_option_is_enabled(mi_option_eager_page_commit)) { - size_t psize; - uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); - bool is_zero = false; - const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); - _mi_mem_commit(start, psize + gsize, &is_zero, tld->os); - if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } - if (is_zero) { page->is_zero_init = true; } - } - } - if (page->is_reset) { - mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? - } - return page; +static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(_mi_page_segment(page) == segment); + mi_assert_internal(!page->segment_in_use); + // set in-use before doing unreset to prevent delayed reset + mi_segment_page_free_remove(segment, page, tld); + page->segment_in_use = true; + segment->used++; + if (!page->is_committed) { + mi_assert_internal(!segment->mem_is_fixed); + mi_assert_internal(!page->is_reset); + page->is_committed = true; + if (segment->page_kind < MI_PAGE_LARGE + || !mi_option_is_enabled(mi_option_eager_page_commit)) { + size_t psize; + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + bool is_zero = false; + const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); + _mi_mem_commit(start, psize + gsize, &is_zero, tld->os); + if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } + if (is_zero) { page->is_zero_init = true; } } } - mi_assert(false); - return NULL; + if (page->is_reset) { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? + } + mi_assert_internal(page->segment_in_use); + mi_assert_internal(segment->used <= segment->capacity); + if (segment->used == segment->capacity && segment->page_kind <= MI_PAGE_MEDIUM) { + // if no more free pages, remove from the queue + mi_assert_internal(!mi_segment_has_free(segment)); + mi_segment_remove_from_free_queue(segment, tld); + } } @@ -605,6 +744,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); + mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); @@ -619,19 +759,27 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg // reset the page memory to reduce memory pressure? // note: must come after setting `segment_in_use` to false but before block_size becomes 0 - mi_page_reset(segment, page, 0 /*used_size*/, tld); + //mi_page_reset(segment, page, 0 /*used_size*/, tld); - // zero the page data, but not the segment fields + // zero the page data, but not the segment fields and block_size (for page size calculations) + size_t block_size = page->block_size; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + page->block_size = block_size; segment->used--; + + // add to the free page list for reuse/reset + if (segment->page_kind <= MI_PAGE_MEDIUM) { + mi_segment_page_free_add(segment, page, tld); + } } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); + mi_segment_page_free_reset_delayed(tld); // mark it as free now mi_segment_page_clear(segment, page, tld); @@ -690,10 +838,12 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); // remove the segment from the free page queue if needed - mi_segment_remove_from_free_queue(segment,tld); + mi_segment_page_free_reset_delayed(tld); + mi_segment_page_free_remove_all(segment, tld); + mi_segment_remove_from_free_queue(segment, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); // all pages in the segment are abandoned; add it to the abandoned list @@ -708,7 +858,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); @@ -755,7 +906,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen segment->abandoned_next = NULL; mi_segments_track_size((long)segment->segment_size,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); _mi_stat_decrease(&tld->stats->segments_abandoned,1); // add its abandoned pages to the current thread @@ -765,6 +916,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen if (page->segment_in_use) { mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); + mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); @@ -801,30 +953,55 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen Small page allocation ----------------------------------------------------------- */ -// Allocate a small page inside a segment. -// Requires that the page has free pages -static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { + +static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); - mi_page_t* page = mi_segment_find_free(segment, tld); - mi_assert_internal(page->segment_in_use); - mi_assert_internal(segment->used <= segment->capacity); - if (segment->used == segment->capacity) { - // if no more free pages, remove from the queue - mi_assert_internal(!mi_segment_has_free(segment)); - mi_segment_remove_from_free_queue(segment,tld); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (!page->segment_in_use) { + mi_segment_page_claim(segment, page, tld); + return page; + } } - return page; + mi_assert(false); + return NULL; } -static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_queue_t* free_queue = mi_segment_free_queue_of_kind(kind,tld); - if (mi_segment_queue_is_empty(free_queue)) { - mi_segment_t* segment = mi_segment_alloc(0,kind,page_shift,tld,os_tld); - if (segment == NULL) return NULL; - mi_segment_enqueue(free_queue, segment); + +// Allocate a page inside a segment. Requires that the page has free pages +static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(mi_segment_has_free(segment)); + return mi_segment_find_free(segment, tld); +} + +static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + mi_page_t* page = NULL; + mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); + if (free_queue->first != NULL && free_queue->first->used < free_queue->first->capacity) { + // prefer to allocate from an available segment + // (to allow more chance of other segments to become completely freed) + page = mi_segment_page_alloc_in(free_queue->first, tld); } - mi_assert_internal(free_queue->first != NULL); - mi_page_t* page = mi_segment_page_alloc_in(free_queue->first,tld); + else { + // otherwise try to pop from the page free list + page = mi_segment_page_free_top(kind, tld); + if (page != NULL) { + mi_segment_page_claim(_mi_page_segment(page), page, tld); + } + else { + // if that failed, find an available segment the segment free queue again + if (mi_segment_queue_is_empty(free_queue)) { + // possibly allocate a fresh segment + mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld); + if (segment == NULL) return NULL; // return NULL if out-of-memory + mi_segment_enqueue(free_queue, segment); + } + mi_assert_internal(free_queue->first != NULL); + page = mi_segment_page_alloc_in(free_queue->first, tld); + } + } + mi_assert_internal(page != NULL); #if MI_DEBUG>=2 _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; #endif @@ -883,7 +1060,9 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ else { page = mi_segment_huge_page_alloc(block_size,tld,os_tld); } - mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page))); + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); + mi_segment_page_free_reset_delayed(tld); + mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); return page; } From f92a2a72649b568a7d359f6b05f315c2919bc8c8 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 10:18:32 -0800 Subject: [PATCH 161/293] add argument pointer to the register output routine --- include/mimalloc.h | 4 ++-- src/options.c | 37 ++++++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 94d9edfc..08af2eb9 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -111,8 +111,8 @@ mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept; -typedef void (mi_output_fun)(const char* msg); -mi_decl_export void mi_register_output(mi_output_fun* out) mi_attr_noexcept; +typedef void (mi_output_fun)(const char* msg, void* arg); +mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; mi_decl_export void mi_collect(bool force) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept; diff --git a/src/options.c b/src/options.c index 0d3bd393..ed1237d1 100644 --- a/src/options.c +++ b/src/options.c @@ -140,7 +140,8 @@ void mi_option_disable(mi_option_t option) { } -static void mi_out_stderr(const char* msg) { +static void mi_out_stderr(const char* msg, void* arg) { + UNUSED(arg); #ifdef _WIN32 // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. @@ -160,7 +161,8 @@ static void mi_out_stderr(const char* msg) { static char out_buf[MI_MAX_DELAY_OUTPUT+1]; static _Atomic(uintptr_t) out_len; -static void mi_out_buf(const char* msg) { +static void mi_out_buf(const char* msg, void* arg) { + UNUSED(arg); if (msg==NULL) return; if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; size_t n = strlen(msg); @@ -175,14 +177,14 @@ static void mi_out_buf(const char* msg) { memcpy(&out_buf[start], msg, n); } -static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { +static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { if (out==NULL) return; // claim (if `no_more_buf == true`, no more output will be added after this point) size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; - out(out_buf); + out(out_buf,arg); if (!no_more_buf) { out_buf[count] = '\n'; // if continue with the buffer, insert a newline } @@ -191,9 +193,9 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { // Once this module is loaded, switch to this routine // which outputs to stderr and the delayed output buffer. -static void mi_out_buf_stderr(const char* msg) { - mi_out_stderr(msg); - mi_out_buf(msg); +static void mi_out_buf_stderr(const char* msg, void* arg) { + mi_out_stderr(msg,arg); + mi_out_buf(msg,arg); } @@ -206,21 +208,25 @@ static void mi_out_buf_stderr(const char* msg) { // For now, don't register output from multiple threads. #pragma warning(suppress:4180) static mi_output_fun* volatile mi_out_default; // = NULL +static volatile _Atomic(void*) mi_out_arg; // = NULL -static mi_output_fun* mi_out_get_default(void) { +static mi_output_fun* mi_out_get_default(void** parg) { + if (parg != NULL) { *parg = mi_atomic_read_ptr(&mi_out_arg); } mi_output_fun* out = mi_out_default; return (out == NULL ? &mi_out_buf : out); } -void mi_register_output(mi_output_fun* out) mi_attr_noexcept { +void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - if (out!=NULL) mi_out_buf_flush(out,true); // output all the delayed output now + mi_atomic_write_ptr(&mi_out_arg, arg); + if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now } // add stderr to the delayed output after the module is loaded static void mi_add_stderr_output() { - mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr - mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output + mi_assert_internal(mi_out_default == NULL); + mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr + mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output } // -------------------------------------------------------- @@ -234,10 +240,11 @@ static mi_decl_thread bool recurse = false; void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { if (recurse) return; - if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(); + void* arg = NULL; + if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(&arg); recurse = true; - if (prefix != NULL) out(prefix); - out(message); + if (prefix != NULL) out(prefix,arg); + out(message,arg); recurse = false; return; } From 0956a05bf6fc731e811a8696364caffd5b7e6da3 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 10:21:32 -0800 Subject: [PATCH 162/293] add argument pointer to the register deferred free callback --- include/mimalloc.h | 4 ++-- src/page.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 08af2eb9..1f6f1ef7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -108,8 +108,8 @@ mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; -typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); -mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept; +typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); +mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept; typedef void (mi_output_fun)(const char* msg, void* arg); mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; diff --git a/src/page.c b/src/page.c index 7491bd61..6a6e09d6 100644 --- a/src/page.c +++ b/src/page.c @@ -764,18 +764,20 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { ----------------------------------------------------------- */ static mi_deferred_free_fun* volatile deferred_free = NULL; +static volatile _Atomic(void*) deferred_arg; // = NULL void _mi_deferred_free(mi_heap_t* heap, bool force) { heap->tld->heartbeat++; if (deferred_free != NULL && !heap->tld->recurse) { heap->tld->recurse = true; - deferred_free(force, heap->tld->heartbeat); + deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(&deferred_arg)); heap->tld->recurse = false; } } -void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { +void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept { deferred_free = fn; + mi_atomic_write_ptr(&deferred_arg, arg); } From 783e3377f79ee82af43a0793910a9f2d01ac7863 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 10:53:54 -0800 Subject: [PATCH 163/293] add output argument to stat printing --- include/mimalloc-internal.h | 4 +- include/mimalloc.h | 5 +- src/init.c | 2 +- src/options.c | 27 +++--- src/stats.c | 160 ++++++++++++++++++------------------ test/main-override-static.c | 4 +- test/test-stress.c | 4 +- 7 files changed, 104 insertions(+), 102 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 3042e6f9..d5ce9f59 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -33,8 +33,8 @@ terms of the MIT license. A copy of the license can be found in the file // "options.c" -void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message); -void _mi_fprintf(mi_output_fun* out, const char* fmt, ...); +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); +void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); void _mi_error_message(const char* fmt, ...); void _mi_warning_message(const char* fmt, ...); void _mi_verbose_message(const char* fmt, ...); diff --git a/include/mimalloc.h b/include/mimalloc.h index 1f6f1ef7..51d96609 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -118,12 +118,13 @@ mi_decl_export void mi_collect(bool force) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept; mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; -mi_decl_export void mi_stats_print(mi_output_fun* out) mi_attr_noexcept; +mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL +mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; mi_decl_export void mi_process_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_done(void) mi_attr_noexcept; -mi_decl_export void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept; +mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; // ------------------------------------------------------------------------------------- diff --git a/src/init.c b/src/init.c index 3df854cf..79e1e044 100644 --- a/src/init.c +++ b/src/init.c @@ -390,7 +390,7 @@ static void mi_process_load(void) { const char* msg = NULL; mi_allocator_init(&msg); if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { - _mi_fputs(NULL,NULL,msg); + _mi_fputs(NULL,NULL,NULL,msg); } } diff --git a/src/options.c b/src/options.c index ed1237d1..017b9d59 100644 --- a/src/options.c +++ b/src/options.c @@ -238,10 +238,11 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT // inside the C runtime causes another message. static mi_decl_thread bool recurse = false; -void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) { if (recurse) return; - void* arg = NULL; - if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(&arg); + if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr? + out = mi_out_get_default(&arg); + } recurse = true; if (prefix != NULL) out(prefix,arg); out(message,arg); @@ -251,21 +252,21 @@ void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { // Define our own limited `fprintf` that avoids memory allocation. // We do this using `snprintf` with a limited buffer. -static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) { +static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) { char buf[512]; if (fmt==NULL) return; if (recurse) return; recurse = true; vsnprintf(buf,sizeof(buf)-1,fmt,args); recurse = false; - _mi_fputs(out,prefix,buf); + _mi_fputs(out,arg,prefix,buf); } -void _mi_fprintf( mi_output_fun* out, const char* fmt, ... ) { +void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { va_list args; va_start(args,fmt); - mi_vfprintf(out,NULL,fmt,args); + mi_vfprintf(out,arg,NULL,fmt,args); va_end(args); } @@ -273,7 +274,7 @@ void _mi_trace_message(const char* fmt, ...) { if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher va_list args; va_start(args, fmt); - mi_vfprintf(NULL, "mimalloc: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args); va_end(args); } @@ -281,7 +282,7 @@ void _mi_verbose_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_verbose)) return; va_list args; va_start(args,fmt); - mi_vfprintf(NULL, "mimalloc: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args); va_end(args); } @@ -290,7 +291,7 @@ void _mi_error_message(const char* fmt, ...) { if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); - mi_vfprintf(NULL, "mimalloc: error: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); va_end(args); mi_assert(false); } @@ -300,14 +301,14 @@ void _mi_warning_message(const char* fmt, ...) { if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); - mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args); va_end(args); } #if MI_DEBUG void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { - _mi_fprintf(NULL,"mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); + _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); abort(); } #endif @@ -315,7 +316,7 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) { va_list args; va_start(args, fmt); - mi_vfprintf(NULL, "mimalloc: fatal: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: fatal: ", fmt, args); va_end(args); #if (MI_SECURE>=0) abort(); diff --git a/src/stats.c b/src/stats.c index cb6d8866..57599821 100644 --- a/src/stats.c +++ b/src/stats.c @@ -126,7 +126,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { // unit > 0 : size in binary bytes // unit == 0: count as decimal // unit < 0 : count in binary -static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char* fmt) { +static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) { char buf[32]; int len = 32; const char* suffix = (unit <= 0 ? " " : "b"); @@ -147,75 +147,75 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const const long frac1 = (long)(tens%10); snprintf(buf, len, "%ld.%ld %s%s", whole, frac1, magnitude, suffix); } - _mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf); + _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf); } -static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out) { - mi_printf_amount(n,unit,out,NULL); +static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { + mi_printf_amount(n,unit,out,arg,NULL); } -static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out) { - if (unit==1) _mi_fprintf(out,"%11s"," "); - else mi_print_amount(n,0,out); +static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { + if (unit==1) _mi_fprintf(out, arg, "%11s"," "); + else mi_print_amount(n,0,out,arg); } -static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out ) { - _mi_fprintf(out,"%10s:", msg); +static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg,"%10s:", msg); if (unit>0) { - mi_print_amount(stat->peak, unit, out); - mi_print_amount(stat->allocated, unit, out); - mi_print_amount(stat->freed, unit, out); - mi_print_amount(unit, 1, out); - mi_print_count(stat->allocated, unit, out); + mi_print_amount(stat->peak, unit, out, arg); + mi_print_amount(stat->allocated, unit, out, arg); + mi_print_amount(stat->freed, unit, out, arg); + mi_print_amount(unit, 1, out, arg); + mi_print_count(stat->allocated, unit, out, arg); if (stat->allocated > stat->freed) - _mi_fprintf(out, " not all freed!\n"); + _mi_fprintf(out, arg, " not all freed!\n"); else - _mi_fprintf(out, " ok\n"); + _mi_fprintf(out, arg, " ok\n"); } else if (unit<0) { - mi_print_amount(stat->peak, -1, out); - mi_print_amount(stat->allocated, -1, out); - mi_print_amount(stat->freed, -1, out); + mi_print_amount(stat->peak, -1, out, arg); + mi_print_amount(stat->allocated, -1, out, arg); + mi_print_amount(stat->freed, -1, out, arg); if (unit==-1) { - _mi_fprintf(out, "%22s", ""); + _mi_fprintf(out, arg, "%22s", ""); } else { - mi_print_amount(-unit, 1, out); - mi_print_count((stat->allocated / -unit), 0, out); + mi_print_amount(-unit, 1, out, arg); + mi_print_count((stat->allocated / -unit), 0, out, arg); } if (stat->allocated > stat->freed) - _mi_fprintf(out, " not all freed!\n"); + _mi_fprintf(out, arg, " not all freed!\n"); else - _mi_fprintf(out, " ok\n"); + _mi_fprintf(out, arg, " ok\n"); } else { - mi_print_amount(stat->peak, 1, out); - mi_print_amount(stat->allocated, 1, out); - _mi_fprintf(out, "\n"); + mi_print_amount(stat->peak, 1, out, arg); + mi_print_amount(stat->allocated, 1, out, arg); + _mi_fprintf(out, arg, "\n"); } } -static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out ) { - _mi_fprintf(out, "%10s:", msg); - mi_print_amount(stat->total, -1, out); - _mi_fprintf(out, "\n"); +static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg, "%10s:", msg); + mi_print_amount(stat->total, -1, out, arg); + _mi_fprintf(out, arg, "\n"); } -static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) { +static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) { const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); const long avg_frac1 = (long)(avg_tens%10); - _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); + _mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); } -static void mi_print_header(mi_output_fun* out ) { - _mi_fprintf(out,"%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count "); +static void mi_print_header(mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count "); } #if MI_STAT>1 -static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out) { +static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out, void* arg) { bool found = false; char buf[64]; for (size_t i = 0; i <= max; i++) { @@ -224,14 +224,14 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin int64_t unit = _mi_bin_size((uint8_t)i); snprintf(buf, 64, "%s %3zu", fmt, i); mi_stat_add(all, &bins[i], unit); - mi_stat_print(&bins[i], buf, unit, out); + mi_stat_print(&bins[i], buf, unit, out, arg); } } //snprintf(buf, 64, "%s all", fmt); //mi_stat_print(all, buf, 1); if (found) { - _mi_fprintf(out, "\n"); - mi_print_header(out); + _mi_fprintf(out, arg, "\n"); + mi_print_header(out, arg); } } #endif @@ -239,40 +239,40 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); -static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) mi_attr_noexcept { - mi_print_header(out); +static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out, void* arg) mi_attr_noexcept { + mi_print_header(out,arg); #if MI_STAT>1 mi_stat_count_t normal = { 0,0,0,0 }; - mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out); - mi_stat_print(&normal, "normal", 1, out); - mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out); - mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out); + mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out,arg); + mi_stat_print(&normal, "normal", 1, out, arg); + mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg); + mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out, arg); mi_stat_count_t total = { 0,0,0,0 }; mi_stat_add(&total, &normal, 1); mi_stat_add(&total, &stats->huge, 1); mi_stat_add(&total, &stats->giant, 1); - mi_stat_print(&total, "total", 1, out); - _mi_fprintf(out, "malloc requested: "); - mi_print_amount(stats->malloc.allocated, 1, out); - _mi_fprintf(out, "\n\n"); + mi_stat_print(&total, "total", 1, out, arg); + _mi_fprintf(out, arg, "malloc requested: "); + mi_print_amount(stats->malloc.allocated, 1, out, arg); + _mi_fprintf(out, arg, "\n\n"); #endif - mi_stat_print(&stats->reserved, "reserved", 1, out); - mi_stat_print(&stats->committed, "committed", 1, out); - mi_stat_print(&stats->reset, "reset", 1, out); - mi_stat_print(&stats->page_committed, "touched", 1, out); - mi_stat_print(&stats->segments, "segments", -1, out); - mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out); - mi_stat_print(&stats->segments_cache, "-cached", -1, out); - mi_stat_print(&stats->pages, "pages", -1, out); - mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out); - mi_stat_counter_print(&stats->pages_extended, "-extended", out); - mi_stat_counter_print(&stats->page_no_retire, "-noretire", out); - mi_stat_counter_print(&stats->mmap_calls, "mmaps", out); - mi_stat_counter_print(&stats->commit_calls, "commits", out); - mi_stat_print(&stats->threads, "threads", -1, out); - mi_stat_counter_print_avg(&stats->searches, "searches", out); - _mi_fprintf(out, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); - if (elapsed > 0) _mi_fprintf(out, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); + mi_stat_print(&stats->reserved, "reserved", 1, out, arg); + mi_stat_print(&stats->committed, "committed", 1, out, arg); + mi_stat_print(&stats->reset, "reset", 1, out, arg); + mi_stat_print(&stats->page_committed, "touched", 1, out, arg); + mi_stat_print(&stats->segments, "segments", -1, out, arg); + mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg); + mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg); + mi_stat_print(&stats->pages, "pages", -1, out, arg); + mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg); + mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg); + mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); + mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); + mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); + mi_stat_print(&stats->threads, "threads", -1, out, arg); + mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); + _mi_fprintf(out, arg, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); + if (elapsed > 0) _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); mi_msecs_t user_time; mi_msecs_t sys_time; @@ -281,13 +281,13 @@ static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun size_t page_reclaim; size_t peak_commit; mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit); - _mi_fprintf(out,"%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim ); - mi_printf_amount((int64_t)peak_rss, 1, out, "%s"); + _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim ); + mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); if (peak_commit > 0) { - _mi_fprintf(out,", commit charge: "); - mi_printf_amount((int64_t)peak_commit, 1, out, "%s"); + _mi_fprintf(out, arg, ", commit charge: "); + mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s"); } - _mi_fprintf(out,"\n"); + _mi_fprintf(out, arg, "\n"); } static mi_msecs_t mi_time_start; // = 0 @@ -319,20 +319,20 @@ void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` mi_stats_merge_from(stats); } - -static void mi_stats_print_ex(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) { - mi_stats_merge_from(stats); - _mi_stats_print(&_mi_stats_main, elapsed, out); +void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { + mi_msecs_t elapsed = _mi_clock_end(mi_time_start); + mi_stats_merge_from(mi_stats_get_default()); + _mi_stats_print(&_mi_stats_main, elapsed, out, arg); } -void mi_stats_print(mi_output_fun* out) mi_attr_noexcept { - mi_msecs_t elapsed = _mi_clock_end(mi_time_start); - mi_stats_print_ex(mi_stats_get_default(),elapsed,out); +void mi_stats_print(void* out) mi_attr_noexcept { + // for compatibility there is an `out` parameter (which can be `stdout` or `stderr`) + mi_stats_print_out((mi_output_fun*)out, NULL); } -void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept { +void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_msecs_t elapsed = _mi_clock_end(mi_time_start); - _mi_stats_print(mi_stats_get_default(), elapsed, out); + _mi_stats_print(mi_stats_get_default(), elapsed, out, arg); } diff --git a/test/main-override-static.c b/test/main-override-static.c index b04bfeef..54a5ea66 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -13,7 +13,7 @@ static void corrupt_free(); int main() { mi_version(); - + // detect double frees and heap corruption // double_free1(); // double_free2(); @@ -106,4 +106,4 @@ static void corrupt_free() { for (int i = 0; i < 4096; i++) { malloc(SZ); } -} \ No newline at end of file +} diff --git a/test/test-stress.c b/test/test-stress.c index d295f741..42628d7c 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -119,7 +119,7 @@ static void free_items(void* p) { static void stress(intptr_t tid) { //bench_start_thread(); uintptr_t r = tid * 43; - const size_t max_item_shift = 5; // 128 + const size_t max_item_shift = 5; // 128 const size_t max_item_retained_shift = max_item_shift + 2; size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more size_t retain = allocs / 2; @@ -135,7 +135,7 @@ static void stress(intptr_t tid) { allocs--; if (data_top >= data_size) { data_size += 100000; - data = (void**)custom_realloc(data, data_size * sizeof(void*)); + data = (void**)custom_realloc(data, data_size * sizeof(void*)); } data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r); } From c9b5ac80b3a22a2456035651afcae1966ce6d3ee Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 12:00:44 -0800 Subject: [PATCH 164/293] update page reset queue to just do delayed page resets --- include/mimalloc-types.h | 3 +- src/init.c | 2 +- src/options.c | 2 +- src/segment.c | 192 ++++++++++++++++++--------------------- 4 files changed, 89 insertions(+), 110 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 51306808..5d5f6dfc 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -417,8 +417,7 @@ typedef struct mi_os_tld_s { typedef struct mi_segments_tld_s { mi_segment_queue_t small_free; // queue of segments with free small pages mi_segment_queue_t medium_free; // queue of segments with free medium pages - mi_page_queue_t small_pages_free; // page queue of free small pages - mi_page_queue_t medium_pages_free; // page queue of free medium pages + mi_page_queue_t pages_reset; // queue of freed pages that can be reset size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments diff --git a/src/init.c b/src/init.c index 085a5011..debc2517 100644 --- a/src/init.c +++ b/src/init.c @@ -105,7 +105,7 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; static mi_tld_t tld_main = { 0, false, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, {NULL ,NULL, 0}, + { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments diff --git a/src/options.c b/src/options.c index 77205713..17e3a836 100644 --- a/src/options.c +++ b/src/options.c @@ -67,7 +67,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds diff --git a/src/segment.c b/src/segment.c index fb5ea0ec..a2cd945c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -134,18 +134,10 @@ static size_t mi_segment_page_size(const mi_segment_t* segment) { } } -static mi_page_queue_t* mi_segment_page_free_queue(mi_page_kind_t kind, mi_segments_tld_t* tld) { - if (kind==MI_PAGE_SMALL) return &tld->small_pages_free; - else if (kind==MI_PAGE_MEDIUM) return &tld->medium_pages_free; - else return NULL; -} - #if (MI_DEBUG>=3) -static bool mi_segment_page_free_contains(mi_page_kind_t kind, const mi_page_t* page, mi_segments_tld_t* tld) { - const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld); - if (pq == NULL) return false; - mi_page_t* p = pq->first; +static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_page_t* p = tld->pages_reset.first; while (p != NULL) { if (p == page) return true; p = p->next; @@ -164,8 +156,8 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* if (!page->segment_in_use) { nfree++; } - else { - mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); + if (page->segment_in_use || page->is_reset) { + mi_assert_expensive(!mi_pages_reset_contains(page, tld)); } } mi_assert_internal(nfree + segment->used == segment->capacity); @@ -176,17 +168,15 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* } #endif -static bool mi_segment_page_free_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { - mi_page_kind_t kind = _mi_page_segment(page)->page_kind; +static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { if (page->next != NULL || page->prev != NULL) { - mi_assert_internal(mi_segment_page_free_contains(kind, page, tld)); + mi_assert_internal(mi_pages_reset_contains(page, tld)); return false; } - if (kind > MI_PAGE_MEDIUM) return true; - // both next and prev are NULL, check for singleton list - const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld); - mi_assert_internal(pq!=NULL); - return (pq->first != page && pq->last != page); + else { + // both next and prev are NULL, check for singleton list + return (tld->pages_reset.first != page && tld->pages_reset.last != page); + } } @@ -274,44 +264,57 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, The free page queue ----------------------------------------------------------- */ -static void mi_segment_page_free_set_expire(mi_page_t* page) { - *((intptr_t*)(&page->heap)) = _mi_clock_now() + mi_option_get(mi_option_reset_delay); +// we re-use the heap field for the expiration counter. Since this is a +// pointer, it can be 32-bit while the clock is always 64-bit. To guard +// against overflow, we use substraction to check for expiry which work +// as long as the reset delay is under (2^30 - 1) milliseconds (~12 days) +static void mi_page_reset_set_expire(mi_page_t* page) { + intptr_t expire = (intptr_t)(_mi_clock_now() + mi_option_get(mi_option_reset_delay)); + page->heap = (mi_heap_t*)expire; } -static mi_msecs_t mi_segment_page_free_get_expire(mi_page_t* page) { - return *((intptr_t*)(&page->heap)); +static bool mi_page_reset_is_expired(mi_page_t* page, mi_msecs_t now) { + intptr_t expire = (intptr_t)(page->heap); + return (((intptr_t)now - expire) >= 0); } -static void mi_segment_page_free_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { - mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); +static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(!page->segment_in_use); - mi_assert_internal(_mi_page_segment(page) == segment); - mi_assert_internal(mi_segment_page_free_not_in_queue(page,tld)); - mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); - mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld); - // push on top - mi_segment_page_free_set_expire(page); - page->next = pq->first; - page->prev = NULL; - if (pq->first == NULL) { - mi_assert_internal(pq->last == NULL); - pq->first = pq->last = page; + mi_assert_internal(mi_page_not_in_queue(page,tld)); + mi_assert_expensive(!mi_pages_reset_contains(page, tld)); + mi_assert_internal(_mi_page_segment(page)==segment); + if (!mi_option_is_enabled(mi_option_page_reset)) return; + if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; + + if (mi_option_get(mi_option_reset_delay) == 0) { + // reset immediately? + mi_page_reset(segment, page, 0, tld); } else { - pq->first->prev = page; - pq->first = page; + // otherwise push on the delayed page reset queue + mi_page_queue_t* pq = &tld->pages_reset; + // push on top + mi_page_reset_set_expire(page); + page->next = pq->first; + page->prev = NULL; + if (pq->first == NULL) { + mi_assert_internal(pq->last == NULL); + pq->first = pq->last = page; + } + else { + pq->first->prev = page; + pq->first = page; + } } } -static void mi_segment_page_free_remove(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { - if (segment->page_kind > MI_PAGE_MEDIUM) return; - if (mi_segment_page_free_not_in_queue(page,tld)) return; +static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) { + if (mi_page_not_in_queue(page,tld)) return; - mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld); + mi_page_queue_t* pq = &tld->pages_reset; mi_assert_internal(pq!=NULL); - mi_assert_internal(_mi_page_segment(page)==segment); mi_assert_internal(!page->segment_in_use); - mi_assert_internal(mi_segment_page_free_contains(segment->page_kind, page, tld)); + mi_assert_internal(mi_pages_reset_contains(page, tld)); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == pq->last) pq->last = page->prev; @@ -320,33 +323,33 @@ static void mi_segment_page_free_remove(mi_segment_t* segment, mi_page_t* page, page->heap = NULL; } -static void mi_segment_page_free_remove_all(mi_segment_t* segment, mi_segments_tld_t* tld) { - if (segment->page_kind > MI_PAGE_MEDIUM) return; +static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, mi_segments_tld_t* tld) { + if (segment->mem_is_fixed) return; for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use) { - mi_segment_page_free_remove(segment, page, tld); + if (!page->segment_in_use && !page->is_reset) { + mi_pages_reset_remove(page, tld); } + else { + mi_assert_internal(mi_page_not_in_queue(page,tld)); + } } } -static mi_page_t* mi_segment_page_free_top(mi_page_kind_t kind, mi_segments_tld_t* tld) { - mi_assert_internal(kind <= MI_PAGE_MEDIUM); - mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld); - return pq->first; -} - -static void mi_segment_page_free_reset_delayedx(mi_msecs_t now, mi_page_kind_t kind, mi_segments_tld_t* tld) { - mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld); - mi_assert_internal(pq != NULL); +static void mi_reset_delayed(mi_segments_tld_t* tld) { + if (!mi_option_is_enabled(mi_option_page_reset)) return; + mi_msecs_t now = _mi_clock_now(); + mi_page_queue_t* pq = &tld->pages_reset; + // from oldest up to the first that has not expired yet mi_page_t* page = pq->last; - while (page != NULL && (now - mi_segment_page_free_get_expire(page)) >= 0) { - mi_page_t* const prev = page->prev; + while (page != NULL && mi_page_reset_is_expired(page,now)) { + mi_page_t* const prev = page->prev; // save previous field mi_page_reset(_mi_page_segment(page), page, 0, tld); page->heap = NULL; page->prev = page->next = NULL; page = prev; } + // discard the reset pages from the queue pq->last = page; if (page != NULL){ page->next = NULL; @@ -356,12 +359,6 @@ static void mi_segment_page_free_reset_delayedx(mi_msecs_t now, mi_page_kind_t k } } -static void mi_segment_page_free_reset_delayed(mi_segments_tld_t* tld) { - if (!mi_option_is_enabled(mi_option_page_reset)) return; - mi_msecs_t now = _mi_clock_now(); - mi_segment_page_free_reset_delayedx(now, MI_PAGE_SMALL, tld); - mi_segment_page_free_reset_delayedx(now, MI_PAGE_MEDIUM, tld); -} @@ -541,10 +538,8 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache == NULL); - mi_assert_internal(tld->small_pages_free.first == NULL); - mi_assert_internal(tld->medium_pages_free.first == NULL); - mi_assert_internal(tld->small_free.first == NULL); - mi_assert_internal(tld->medium_free.first == NULL); + mi_assert_internal(tld->pages_reset.first == NULL); + mi_assert_internal(tld->pages_reset.last == NULL); } @@ -672,7 +667,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { UNUSED(force); mi_assert(segment != NULL); - mi_segment_page_free_remove_all(segment, tld); + mi_pages_reset_remove_all_in_segment(segment, tld); mi_segment_remove_from_free_queue(segment,tld); mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); @@ -703,7 +698,7 @@ static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg mi_assert_internal(_mi_page_segment(page) == segment); mi_assert_internal(!page->segment_in_use); // set in-use before doing unreset to prevent delayed reset - mi_segment_page_free_remove(segment, page, tld); + mi_pages_reset_remove(page, tld); page->segment_in_use = true; segment->used++; if (!page->is_committed) { @@ -744,7 +739,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); - mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); + mi_assert_internal(mi_page_not_in_queue(page, tld)); size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); @@ -770,7 +765,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg // add to the free page list for reuse/reset if (segment->page_kind <= MI_PAGE_MEDIUM) { - mi_segment_page_free_add(segment, page, tld); + mi_pages_reset_add(segment, page, tld); } } @@ -779,7 +774,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment,tld)); - mi_segment_page_free_reset_delayed(tld); + mi_reset_delayed(tld); // mark it as free now mi_segment_page_clear(segment, page, tld); @@ -841,8 +836,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_expensive(mi_segment_is_valid(segment,tld)); // remove the segment from the free page queue if needed - mi_segment_page_free_reset_delayed(tld); - mi_segment_page_free_remove_all(segment, tld); + mi_reset_delayed(tld); + mi_pages_reset_remove_all_in_segment(segment, tld); // do not force reset on free pages in an abandoned segment, as it is already done in segment_thread_collect mi_segment_remove_from_free_queue(segment, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); @@ -858,7 +853,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); + mi_assert_expensive(!mi_pages_reset_contains(page, tld)); mi_assert_expensive(mi_segment_is_valid(segment,tld)); segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); @@ -916,7 +911,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen if (page->segment_in_use) { mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); - mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); + mi_assert_internal(mi_page_not_in_queue(page, tld)); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); @@ -957,7 +952,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_assert_expensive(mi_segment_is_valid(segment, tld)); - for (size_t i = 0; i < segment->capacity; i++) { + for (size_t i = 0; i < segment->capacity; i++) { // TODO: use a bitmap instead of search? mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use) { mi_segment_page_claim(segment, page, tld); @@ -968,7 +963,6 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* return NULL; } - // Allocate a page inside a segment. Requires that the page has free pages static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); @@ -976,33 +970,19 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl } static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_page_t* page = NULL; + // find an available segment the segment free queue mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); - if (free_queue->first != NULL && free_queue->first->used < free_queue->first->capacity) { - // prefer to allocate from an available segment - // (to allow more chance of other segments to become completely freed) - page = mi_segment_page_alloc_in(free_queue->first, tld); - } - else { - // otherwise try to pop from the page free list - page = mi_segment_page_free_top(kind, tld); - if (page != NULL) { - mi_segment_page_claim(_mi_page_segment(page), page, tld); - } - else { - // if that failed, find an available segment the segment free queue again - if (mi_segment_queue_is_empty(free_queue)) { - // possibly allocate a fresh segment - mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld); - if (segment == NULL) return NULL; // return NULL if out-of-memory - mi_segment_enqueue(free_queue, segment); - } - mi_assert_internal(free_queue->first != NULL); - page = mi_segment_page_alloc_in(free_queue->first, tld); - } + if (mi_segment_queue_is_empty(free_queue)) { + // possibly allocate a fresh segment + mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld); + if (segment == NULL) return NULL; // return NULL if out-of-memory + mi_segment_enqueue(free_queue, segment); } + mi_assert_internal(free_queue->first != NULL); + mi_page_t* const page = mi_segment_page_alloc_in(free_queue->first, tld); mi_assert_internal(page != NULL); #if MI_DEBUG>=2 + // verify it is committed _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; #endif return page; @@ -1062,7 +1042,7 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ } mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); - mi_segment_page_free_reset_delayed(tld); - mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); + mi_reset_delayed(tld); + mi_assert_internal(mi_page_not_in_queue(page, tld)); return page; } From 202246425b5c0f2f0dc68a6de9fc9fa6628d7822 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 12:16:01 -0800 Subject: [PATCH 165/293] bump version to 1.4 for further development --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- readme.md | 3 +++ test/CMakeLists.txt | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index f64948d3..0a982bdf 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,5 +1,5 @@ set(mi_version_major 1) -set(mi_version_minor 3) +set(mi_version_minor 4) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 51d96609..fe09c7f2 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 130 // major + 2 digits minor +#define MI_MALLOC_VERSION 140 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes diff --git a/readme.md b/readme.md index 9d3974c9..0a096b5e 100644 --- a/readme.md +++ b/readme.md @@ -56,6 +56,9 @@ Enjoy! ### Releases + +* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and stronger +free list encoding in secure mode. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ed204888..4862c0ec 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 1.3 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +find_package(mimalloc 1.4 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") # overriding with a dynamic library From 0099707af905cddaab3d51a5639a1a2ae21ecf3c Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 17:19:01 -0800 Subject: [PATCH 166/293] use delayed free for all pages; reduce size of the page structure for improved address calculation --- include/mimalloc-internal.h | 45 +++++++-- include/mimalloc-types.h | 57 ++++++----- src/alloc.c | 131 +++++++++++++------------ src/heap.c | 67 +++++++------ src/init.c | 11 +-- src/page-queue.c | 44 ++++----- src/page.c | 190 +++++++++++++++--------------------- src/segment.c | 19 ++-- 8 files changed, 296 insertions(+), 268 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d5ce9f59..a9391a40 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -308,7 +308,7 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const // Quick page start for initialized pages static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - const size_t bsize = page->block_size; + const size_t bsize = page->xblock_size; mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0); return _mi_segment_page_start(segment, page, bsize, page_size, NULL); } @@ -318,7 +318,40 @@ static inline mi_page_t* _mi_ptr_page(void* p) { return _mi_segment_page_of(_mi_ptr_segment(p), p); } +// Get the block size of a page (special cased for huge objects) +static inline size_t mi_page_block_size(const mi_page_t* page) { + const size_t bsize = page->xblock_size; + mi_assert_internal(bsize > 0); + if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) { + return bsize; + } + else { + size_t psize; + _mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL); + return psize; + } +} + // Thread free access +static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { + return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3); +} + +static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { + return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3); +} + +// Heap access +static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { + return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap)); +} + +static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { + mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); + mi_atomic_write(&page->xheap,(uintptr_t)heap); +} + +// Thread free flag helpers static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { return (mi_block_t*)(tf & ~0x03); } @@ -338,7 +371,7 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* // are all blocks in a page freed? static inline bool mi_page_all_free(const mi_page_t* page) { mi_assert_internal(page != NULL); - return (page->used - page->thread_freed == 0); + return (page->used == 0); } // are there immediately available blocks @@ -349,8 +382,8 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) { // are there free blocks in this page? static inline bool mi_page_has_free(mi_page_t* page) { mi_assert_internal(page != NULL); - bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_tf_block(page->thread_free) != NULL)); - mi_assert_internal(hasfree || page->used - page->thread_freed == page->capacity); + bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_page_thread_free(page) != NULL)); + mi_assert_internal(hasfree || page->used == page->capacity); return hasfree; } @@ -364,7 +397,7 @@ static inline bool mi_page_all_used(mi_page_t* page) { static inline bool mi_page_mostly_used(const mi_page_t* page) { if (page==NULL) return true; uint16_t frac = page->reserved / 8U; - return (page->reserved - page->used + page->thread_freed <= frac); + return (page->reserved - page->used <= frac); } static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { @@ -467,7 +500,7 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { - _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); + _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); next = NULL; } return next; diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index da9bfbac..bf288d60 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -124,6 +124,9 @@ terms of the MIT license. A copy of the license can be found in the file #error "define more bins" #endif +// Used as a special value to encode block sizes in 32 bits. +#define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) + // The free lists use encoded next fields // (Only actually encodes when MI_ENCODED_FREELIST is defined.) typedef uintptr_t mi_encoded_t; @@ -136,10 +139,10 @@ typedef struct mi_block_s { // The delayed flags are used for efficient multi-threaded free-ing typedef enum mi_delayed_e { - MI_NO_DELAYED_FREE = 0, - MI_USE_DELAYED_FREE = 1, - MI_DELAYED_FREEING = 2, - MI_NEVER_DELAYED_FREE = 3 + MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list + MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap + MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list + MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim } mi_delayed_t; @@ -167,14 +170,28 @@ typedef uintptr_t mi_thread_free_t; // implement a monotonic heartbeat. The `thread_free` list is needed for // avoiding atomic operations in the common case. // -// `used - thread_freed` == actual blocks that are in use (alive) -// `used - thread_freed + |free| + |local_free| == capacity` // -// note: we don't count `freed` (as |free|) instead of `used` to reduce -// the number of memory accesses in the `mi_page_all_free` function(s). -// note: the funny layout here is due to: -// - access is optimized for `mi_free` and `mi_page_alloc` -// - using `uint16_t` does not seem to slow things down +// `used - |thread_free|` == actual blocks that are in use (alive) +// `used - |thread_free| + |free| + |local_free| == capacity` +// +// We don't count `freed` (as |free|) but use `used` to reduce +// the number of memory accesses in the `mi_page_all_free` function(s). +// +// Notes: +// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`) +// - Using `uint16_t` does not seem to slow things down +// - The size is 8 words on 64-bit which helps the page index calculations +// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 +// and 12 are still good for address calculation) +// - To limit the structure size, the `xblock_size` is 32-bits only; for +// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size +// - `thread_free` uses the bottom bits as a delayed-free flags to optimize +// concurrent frees where only the first concurrent free adds to the owning +// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). +// The invariant is that no-delayed-free is only set if there is +// at least one block that will be added, or as already been added, to +// the owning heap `thread_delayed_free` list. This guarantees that pages +// will be freed correctly even if only other threads free blocks. typedef struct mi_page_s { // "owned" by the segment uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` @@ -194,23 +211,15 @@ typedef struct mi_page_s { #ifdef MI_ENCODE_FREELIST uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif - size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t xblock_size; // size available in each block (always `>0`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) - volatile _Atomic(uintptr_t) thread_freed; // at least this number of blocks are in `thread_free` - volatile _Atomic(mi_thread_free_t) thread_free; // list of deferred free blocks freed by other threads - - // less accessed info - size_t block_size; // size available in each block (always `>0`) - mi_heap_t* heap; // the owning heap + volatile _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads + volatile _Atomic(uintptr_t) xheap; + struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` - - // improve page index calculation - // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words - #if (MI_INTPTR_SIZE==4) - void* padding[1]; // 12/14 words on 32-bit plain - #endif } mi_page_t; diff --git a/src/alloc.c b/src/alloc.c index bd81aba0..621fb0db 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { - mi_assert_internal(page->block_size==0||page->block_size >= size); + mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* block = page->free; if (mi_unlikely(block == NULL)) { return _mi_malloc_generic(heap, size); // slow path @@ -94,16 +94,16 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) UNUSED(size); mi_assert_internal(p != NULL); - mi_assert_internal(size > 0 && page->block_size >= size); + mi_assert_internal(size > 0 && mi_page_block_size(page) >= size); mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p,page->block_size)); + mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page))); } else { // otherwise memset - memset(p, 0, page->block_size); + memset(p, 0, mi_page_block_size(page)); } } @@ -141,13 +141,12 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { // The decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed - mi_thread_free_t tf = (mi_thread_free_t)mi_atomic_read_relaxed(mi_atomic_cast(uintptr_t, &page->thread_free)); + // Walk the free lists to verify positively if it is already freed if (mi_list_contains(page, page->free, block) || mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, mi_tf_block(tf), block)) + mi_list_contains(page, mi_page_thread_free(page), block)) { - _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); + _mi_fatal_error("double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); return true; } return false; @@ -177,44 +176,50 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block // Free // ------------------------------------------------------ +// free huge block from another thread +static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { + // huge page segments are always abandoned and can be freed immediately + mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); + mi_assert_internal(segment == _mi_page_segment(page)); + mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + + // claim it and free + mi_heap_t* heap = mi_get_default_heap(); + // paranoia: if this it the last reference, the cas should always succeed + if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { + mi_block_set_next(page, block, page->free); + page->free = block; + page->used--; + page->is_zero = false; + mi_assert(page->used == 0); + mi_tld_t* tld = heap->tld; + const size_t bsize = mi_page_block_size(page); + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&tld->stats.giant, bsize); + } + else { + _mi_stat_decrease(&tld->stats.huge, bsize); + } + _mi_segment_page_free(page, true, &tld->segments); + } +} + // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { - mi_thread_free_t tfree; - mi_thread_free_t tfreex; - bool use_delayed; - + // huge page segments are always abandoned and can be freed immediately mi_segment_t* segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { - // huge page segments are always abandoned and can be freed immediately - mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&segment->abandoned_next))==NULL); - // claim it and free - mi_heap_t* heap = mi_get_default_heap(); - // paranoia: if this it the last reference, the cas should always succeed - if (mi_atomic_cas_strong(&segment->thread_id,heap->thread_id,0)) { - mi_block_set_next(page, block, page->free); - page->free = block; - page->used--; - page->is_zero = false; - mi_assert(page->used == 0); - mi_tld_t* tld = heap->tld; - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&tld->stats.giant, page->block_size); - } - else { - _mi_stat_decrease(&tld->stats.huge, page->block_size); - } - _mi_segment_page_free(page,true,&tld->segments); - } + mi_free_huge_block_mt(segment, page, block); return; } + mi_thread_free_t tfree; + mi_thread_free_t tfreex; + bool use_delayed; do { - tfree = page->thread_free; - use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE || - (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == mi_atomic_read_relaxed(&page->thread_freed)+1) // data-race but ok, just optimizes early release of the page - ); + tfree = mi_atomic_read_relaxed(&page->xthread_free); + use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); if (mi_unlikely(use_delayed)) { // unlikely: this only happens on the first concurrent free in a page that is in the full list tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); @@ -224,15 +229,11 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_set_next(page, block, mi_tf_block(tfree)); tfreex = mi_tf_set_block(tfree,block); } - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); - if (mi_likely(!use_delayed)) { - // increment the thread free count and return - mi_atomic_increment(&page->thread_freed); - } - else { + if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); + mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) @@ -245,10 +246,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // and reset the MI_DELAYED_FREEING flag do { - tfreex = tfree = page->thread_free; - mi_assert_internal(mi_tf_delayed(tfree) == MI_NEVER_DELAYED_FREE || mi_tf_delayed(tfree) == MI_DELAYED_FREEING); - if (mi_tf_delayed(tfree) != MI_NEVER_DELAYED_FREE) tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + tfreex = tfree = mi_atomic_read_relaxed(&page->xthread_free); + mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); + tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } } @@ -257,7 +258,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { #if (MI_DEBUG) - memset(block, MI_DEBUG_FREED, page->block_size); + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif // and push it on the free list @@ -284,7 +285,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); - size_t adjust = (diff % page->block_size); + size_t adjust = (diff % mi_page_block_size(page)); return (mi_block_t*)((uintptr_t)p - adjust); } @@ -329,8 +330,8 @@ void mi_free(void* p) mi_attr_noexcept #if (MI_STAT>1) mi_heap_t* heap = mi_heap_get_default(); mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); - if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1); + if (page->xblock_size <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal[_mi_bin(page->xblock_size)], 1); } // huge page stat is accounted for in `_mi_page_retire` #endif @@ -342,7 +343,9 @@ void mi_free(void* p) mi_attr_noexcept mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; - if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } + if (mi_unlikely(mi_page_all_free(page))) { + _mi_page_retire(page); + } } else { // non-local, aligned blocks, or a full page; use the more generic path @@ -356,13 +359,19 @@ bool _mi_free_delayed_block(mi_block_t* block) { mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(_mi_thread_id() == segment->thread_id); mi_page_t* page = _mi_segment_page_of(segment, block); - if (mi_tf_delayed(page->thread_free) == MI_DELAYED_FREEING) { - // we might already start delayed freeing while another thread has not yet - // reset the delayed_freeing flag; in that case don't free it quite yet if - // this is the last block remaining. - if (page->used - page->thread_freed == 1) return false; - } - _mi_free_block(page,true,block); + + // Clear the no-delayed flag so delayed freeing is used again for this page. + // This must be done before collecting the free lists on this page -- otherwise + // some blocks may end up in the page `thread_free` list with no blocks in the + // heap `thread_delayed_free` list which may cause the page to be never freed! + // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */); + + // collect all other non-local frees to ensure up-to-date `used` count + _mi_page_free_collect(page, false); + + // and free the block (possibly freeing the page as well since used is updated) + _mi_free_block(page, true, block); return true; } @@ -371,7 +380,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; const mi_segment_t* segment = _mi_ptr_segment(p); const mi_page_t* page = _mi_segment_page_of(segment,p); - size_t size = page->block_size; + size_t size = mi_page_block_size(page); if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); diff --git a/src/heap.c b/src/heap.c index 4a589e5c..9f2a4457 100644 --- a/src/heap.c +++ b/src/heap.c @@ -34,7 +34,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void mi_page_t* page = pq->first; while(page != NULL) { mi_page_t* next = page->next; // save next in case the page gets removed from the queue - mi_assert_internal(page->heap == heap); + mi_assert_internal(mi_page_heap(page) == heap); count++; if (!fn(heap, pq, page, arg1, arg2)) return false; page = next; // and continue @@ -50,7 +50,7 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ UNUSED(arg1); UNUSED(arg2); UNUSED(pq); - mi_assert_internal(page->heap == heap); + mi_assert_internal(mi_page_heap(page) == heap); mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(segment->thread_id == heap->thread_id); mi_assert_expensive(_mi_page_is_valid(page)); @@ -118,13 +118,18 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // this may free some segments (but also take ownership of abandoned pages) _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments); } - #if MI_DEBUG - else if (collect == ABANDON && _mi_is_main_thread() && mi_heap_is_backing(heap)) { + else if ( + #ifdef NDEBUG + collect == FORCE + #else + collect >= FORCE + #endif + && _mi_is_main_thread() && mi_heap_is_backing(heap)) + { // the main thread is abandoned, try to free all abandoned segments. // if all memory is freed by now, all segments should be freed. _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); } - #endif } // if abandoning, mark all pages to no longer add to delayed_free @@ -245,25 +250,27 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); // stats - if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&heap->tld->stats.giant,page->block_size); + const size_t bsize = mi_page_block_size(page); + if (bsize > MI_LARGE_OBJ_SIZE_MAX) { + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&heap->tld->stats.giant, bsize); } else { - _mi_stat_decrease(&heap->tld->stats.huge, page->block_size); + _mi_stat_decrease(&heap->tld->stats.huge, bsize); } } - #if (MI_STAT>1) - size_t inuse = page->used - page->thread_freed; - if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse); +#if (MI_STAT>1) + _mi_page_free_collect(page, false); // update used count + const size_t inuse = page->used; + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], inuse); } - mi_heap_stat_decrease(heap,malloc, page->block_size * inuse); // todo: off for aligned blocks... - #endif + mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks... +#endif - // pretend it is all free now - mi_assert_internal(page->thread_freed<=0xFFFF); - page->used = (uint16_t)page->thread_freed; + /// pretend it is all free now + mi_assert_internal(mi_page_thread_free(page) == NULL); + page->used = 0; // and free the page _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments); @@ -374,7 +381,7 @@ static mi_heap_t* mi_heap_of_block(const void* p) { bool valid = (_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(valid); if (mi_unlikely(!valid)) return NULL; - return _mi_segment_page_of(segment,p)->heap; + return mi_page_heap(_mi_segment_page_of(segment,p)); } bool mi_heap_contains_block(mi_heap_t* heap, const void* p) { @@ -390,7 +397,7 @@ static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa bool* found = (bool*)vfound; mi_segment_t* segment = _mi_page_segment(page); void* start = _mi_page_start(segment, page, NULL); - void* end = (uint8_t*)start + (page->capacity * page->block_size); + void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page)); *found = (p >= start && p < end); return (!*found); // continue if not found } @@ -432,13 +439,14 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v mi_assert_internal(page->local_free == NULL); if (page->used == 0) return true; + const size_t bsize = mi_page_block_size(page); size_t psize; uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); if (page->capacity == 1) { // optimize page with one block mi_assert_internal(page->used == 1 && page->free == NULL); - return visitor(page->heap, area, pstart, page->block_size, arg); + return visitor(mi_page_heap(page), area, pstart, bsize, arg); } // create a bitmap of free blocks. @@ -451,8 +459,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v free_count++; mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); size_t offset = (uint8_t*)block - pstart; - mi_assert_internal(offset % page->block_size == 0); - size_t blockidx = offset / page->block_size; // Todo: avoid division? + mi_assert_internal(offset % bsize == 0); + size_t blockidx = offset / bsize; // Todo: avoid division? mi_assert_internal( blockidx < MI_MAX_BLOCKS); size_t bitidx = (blockidx / sizeof(uintptr_t)); size_t bit = blockidx - (bitidx * sizeof(uintptr_t)); @@ -471,8 +479,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v } else if ((m & ((uintptr_t)1 << bit)) == 0) { used_count++; - uint8_t* block = pstart + (i * page->block_size); - if (!visitor(page->heap, area, block, page->block_size, arg)) return false; + uint8_t* block = pstart + (i * bsize); + if (!visitor(mi_page_heap(page), area, block, bsize, arg)) return false; } } mi_assert_internal(page->used == used_count); @@ -487,12 +495,13 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa UNUSED(pq); mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; mi_heap_area_ex_t xarea; + const size_t bsize = mi_page_block_size(page); xarea.page = page; - xarea.area.reserved = page->reserved * page->block_size; - xarea.area.committed = page->capacity * page->block_size; + xarea.area.reserved = page->reserved * bsize; + xarea.area.committed = page->capacity * bsize; xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); - xarea.area.used = page->used - page->thread_freed; // race is ok - xarea.area.block_size = page->block_size; + xarea.area.used = page->used; + xarea.area.block_size = bsize; return fun(heap, &xarea, arg); } diff --git a/src/init.c b/src/init.c index 79e1e044..d81d7459 100644 --- a/src/init.c +++ b/src/init.c @@ -23,12 +23,11 @@ const mi_page_t _mi_page_empty = { { 0, 0 }, #endif 0, // used - NULL, - ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), - 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==4) - , { NULL } // padding - #endif + 0, // xblock_size + NULL, // local_free + ATOMIC_VAR_INIT(0), // xthread_free + ATOMIC_VAR_INIT(0), // xheap + NULL, NULL }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) diff --git a/src/page-queue.c b/src/page-queue.c index 95443a69..68e2aaa4 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -178,20 +178,20 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* #endif static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); - mi_heap_t* heap = page->heap; + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size)); + mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(bin >= MI_BIN_HUGE || page->block_size == pq->block_size); + mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size); mi_assert_expensive(mi_page_queue_contains(pq, page)); return pq; } static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size)); mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(mi_page_is_in_full(page) || page->block_size == pq->block_size); + mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size); return pq; } @@ -246,35 +246,35 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == queue->last) queue->last = page->prev; if (page == queue->first) { queue->first = page->next; // update first - mi_heap_t* heap = page->heap; mi_assert_internal(mi_heap_contains_queue(heap, queue)); mi_heap_queue_first_update(heap,queue); } - page->heap->page_count--; + heap->page_count--; page->next = NULL; page->prev = NULL; - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(!mi_page_queue_contains(queue, page)); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_assert_internal(page->block_size == queue->block_size || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || + mi_assert_internal(page->xblock_size == queue->block_size || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { @@ -296,19 +296,19 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); - mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) || - (page->block_size == to->block_size && mi_page_queue_is_full(from)) || - (page->block_size == from->block_size && mi_page_queue_is_full(to)) || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) || + (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) || + (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == from->last) from->last = page->prev; if (page == from->first) { from->first = page->next; // update first - mi_heap_t* heap = page->heap; mi_assert_internal(mi_heap_contains_queue(heap, from)); mi_heap_queue_first_update(heap, from); } @@ -316,14 +316,14 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro page->prev = to->last; page->next = NULL; if (to->last != NULL) { - mi_assert_internal(page->heap == to->last->heap); + mi_assert_internal(heap == mi_page_heap(to->last)); to->last->next = page; to->last = page; } else { to->first = page; to->last = page; - mi_heap_queue_first_update(page->heap, to); + mi_heap_queue_first_update(heap, to); } mi_page_set_in_full(page, mi_page_queue_is_full(to)); @@ -338,7 +338,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue // set append pages to new heap and count size_t count = 0; for (mi_page_t* page = append->first; page != NULL; page = page->next) { - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + mi_page_set_heap(page,heap); count++; } diff --git a/src/page.c b/src/page.c index 6a6e09d6..40aec0c6 100644 --- a/src/page.c +++ b/src/page.c @@ -29,10 +29,11 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ // Index a block in a page -static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t i) { +static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) { + UNUSED(page); mi_assert_internal(page != NULL); mi_assert_internal(i <= page->reserved); - return (mi_block_t*)((uint8_t*)page_start + (i * page->block_size)); + return (mi_block_t*)((uint8_t*)page_start + (i * block_size)); } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); @@ -69,13 +70,14 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { } static bool mi_page_is_valid_init(mi_page_t* page) { - mi_assert_internal(page->block_size > 0); + mi_assert_internal(page->xblock_size > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); + const size_t bsize = mi_page_block_size(page); mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL,NULL)); + mi_assert_internal(start == _mi_segment_page_start(segment,page,bsize,NULL,NULL)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -89,10 +91,10 @@ static bool mi_page_is_valid_init(mi_page_t* page) { } #endif - mi_block_t* tfree = mi_tf_block(page->thread_free); + mi_block_t* tfree = mi_page_thread_free(page); mi_assert_internal(mi_page_list_is_valid(page, tfree)); - size_t tfree_count = mi_page_list_count(page, tfree); - mi_assert_internal(tfree_count <= page->thread_freed + 1); + //size_t tfree_count = mi_page_list_count(page, tfree); + //mi_assert_internal(tfree_count <= page->thread_freed + 1); size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free); mi_assert_internal(page->used + free_count == page->capacity); @@ -105,14 +107,14 @@ bool _mi_page_is_valid(mi_page_t* page) { #if MI_SECURE mi_assert_internal(page->key != 0); #endif - if (page->heap!=NULL) { + if (mi_page_heap(page)!=NULL) { mi_segment_t* segment = _mi_page_segment(page); - mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id || segment->thread_id==0); + mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0); if (segment->page_kind != MI_PAGE_HUGE) { mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); - mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); - mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); + mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); + mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq)); } } return true; @@ -124,20 +126,20 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid mi_thread_free_t tfreex; mi_delayed_t old_delay; do { - tfree = mi_atomic_read_relaxed(&page->thread_free); + tfree = mi_atomic_read(&page->xthread_free); tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { - mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. + // mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. + tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } else if (delay == old_delay) { break; // avoid atomic operation if already equal } else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { - break; // leave never set + break; // leave never-delayed flag set } - } while ((old_delay == MI_DELAYED_FREEING) || - !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } /* ----------------------------------------------------------- @@ -154,17 +156,17 @@ static void _mi_page_thread_free_collect(mi_page_t* page) mi_thread_free_t tfree; mi_thread_free_t tfreex; do { - tfree = page->thread_free; + tfree = mi_atomic_read_relaxed(&page->xthread_free); head = mi_tf_block(tfree); tfreex = mi_tf_set_block(tfree,NULL); - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); // return if the list is empty if (head == NULL) return; // find the tail -- also to get a proper count (without data races) - uintptr_t max_count = page->capacity; // cannot collect more than capacity - uintptr_t count = 1; + uint32_t max_count = page->capacity; // cannot collect more than capacity + uint32_t count = 1; mi_block_t* tail = head; mi_block_t* next; while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) { @@ -182,7 +184,6 @@ static void _mi_page_thread_free_collect(mi_page_t* page) page->local_free = head; // update counts now - mi_atomic_subu(&page->thread_freed, count); page->used -= count; } @@ -190,7 +191,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { mi_assert_internal(page!=NULL); // collect the thread free list - if (force || mi_tf_block(page->thread_free) != NULL) { // quick test to avoid an atomic operation + if (force || mi_page_thread_free(page) != NULL) { // quick test to avoid an atomic operation _mi_page_thread_free_collect(page); } @@ -228,15 +229,16 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // called from segments when reclaiming abandoned pages void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == NULL); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(!page->is_reset); - mi_assert_internal(mi_tf_delayed(page->thread_free) == MI_NEVER_DELAYED_FREE); - _mi_page_free_collect(page,false); - mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); + mi_assert_internal(mi_page_thread_free_flag(page) == MI_NEVER_DELAYED_FREE); + mi_page_set_heap(page, heap); + mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); - mi_assert_internal(page->heap != NULL); - _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, true); // override never (after push so heap is set) + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) + // _mi_page_free_collect(page,false); // no need, as it is just done before reclaim + mi_assert_internal(mi_page_heap(page)!= NULL); mi_assert_expensive(_mi_page_is_valid(page)); } @@ -270,8 +272,8 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { // otherwise allocate the page page = mi_page_fresh_alloc(heap, pq, pq->block_size); if (page==NULL) return NULL; - mi_assert_internal(pq->block_size==page->block_size); - mi_assert_internal(pq==mi_page_queue(heap,page->block_size)); + mi_assert_internal(pq->block_size==mi_page_block_size(page)); + mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); return page; } @@ -312,11 +314,9 @@ void _mi_page_unfull(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(mi_page_is_in_full(page)); - - _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, false); if (!mi_page_is_in_full(page)) return; - mi_heap_t* heap = page->heap; + mi_heap_t* heap = mi_page_heap(page); mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL]; mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); @@ -329,10 +329,8 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(!mi_page_immediate_available(page)); mi_assert_internal(!mi_page_is_in_full(page)); - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false); if (mi_page_is_in_full(page)) return; - - mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page); + mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page); _mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set } @@ -345,18 +343,17 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); - mi_assert_internal(page->heap != NULL); + mi_assert_internal(mi_page_heap(page) != NULL); -#if MI_DEBUG > 1 - mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); -#endif + mi_heap_t* pheap = mi_page_heap(page); // remove from our page list - mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_segments_tld_t* segments_tld = &pheap->tld->segments; mi_page_queue_remove(pq, page); // page is no longer associated with our heap - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_page_set_heap(page, NULL); #if MI_DEBUG>1 // check there are no references left.. @@ -366,7 +363,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #endif // and abandon it - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == NULL); _mi_segment_page_abandon(page,segments_tld); } @@ -377,33 +374,18 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(mi_page_all_free(page)); - #if MI_DEBUG>1 - // check if we can safely free - mi_thread_free_t free = mi_tf_set_delayed(page->thread_free,MI_NEVER_DELAYED_FREE); - free = mi_atomic_exchange(&page->thread_free, free); - mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING); - #endif + mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING); + // no more aligned blocks in here mi_page_set_has_aligned(page, false); - // account for huge pages here - // (note: no longer necessary as huge pages are always abandoned) - if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size); - } - else { - _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); - } - } - // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) - mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments; mi_page_queue_remove(pq, page); // and free it - mi_assert_internal(page->heap == NULL); + mi_page_set_heap(page,NULL); _mi_segment_page_free(page, force, segments_tld); } @@ -427,7 +409,7 @@ void _mi_page_retire(mi_page_t* page) { // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { + if (mi_likely(page->xblock_size <= MI_SMALL_SIZE_MAX && !mi_page_is_in_full(page))) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); page->retire_expire = 4; @@ -469,15 +451,15 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { #define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) #define MI_MIN_SLICES (2) -static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) { +static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); #if (MI_SECURE<=2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); + mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); - const size_t bsize = page->block_size; // initialize a randomized free list // set up `slice_count` slices to alternate between @@ -491,7 +473,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice size_t counts[MI_MAX_SLICES]; // available objects in the slice for (size_t i = 0; i < slice_count; i++) { - blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); + blocks[i] = mi_page_block_at(page, page_area, bsize, page->capacity + i*slice_extend); counts[i] = slice_extend; } counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) @@ -526,7 +508,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co page->free = free_start; } -static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) +static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); #if (MI_SECURE <= 2) @@ -534,12 +516,13 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); + mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); - const size_t bsize = page->block_size; - mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity); + + mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity); // initialize a sequential free list - mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); + mi_block_t* const last = mi_page_block_at(page, page_area, bsize, page->capacity + extend - 1); mi_block_t* block = start; while(block <= last) { mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); @@ -581,8 +564,9 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) mi_stat_counter_increase(tld->stats.pages_extended, 1); // calculate the extend count + const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size); size_t extend = page->reserved - page->capacity; - size_t max_extend = (page->block_size >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)page->block_size); + size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize); if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND; if (extend > max_extend) { @@ -596,20 +580,20 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) // commit on-demand for large and huge pages? if (_mi_page_segment(page)->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { - uint8_t* start = page_start + (page->capacity * page->block_size); - _mi_mem_commit(start, extend * page->block_size, NULL, &tld->os); + uint8_t* start = page_start + (page->capacity * bsize); + _mi_mem_commit(start, extend * bsize, NULL, &tld->os); } // and append the extend the free list if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) { - mi_page_free_list_extend(page, extend, &tld->stats ); + mi_page_free_list_extend(page, bsize, extend, &tld->stats ); } else { - mi_page_free_list_extend_secure(heap, page, extend, &tld->stats); + mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats); } // enable the new free list page->capacity += (uint16_t)extend; - mi_stat_increase(tld->stats.page_committed, extend * page->block_size); + mi_stat_increase(tld->stats.page_committed, extend * bsize); // extension into zero initialized memory preserves the zero'd free list if (!page->is_zero_init) { @@ -625,9 +609,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert(segment != NULL); mi_assert_internal(block_size > 0); // set fields + mi_page_set_heap(page, heap); size_t page_size; _mi_segment_page_start(segment, page, block_size, &page_size, NULL); - page->block_size = block_size; + page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST @@ -639,14 +624,14 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); - mi_assert_internal(page->thread_free == 0); - mi_assert_internal(page->thread_freed == 0); + mi_assert_internal(page->xthread_free == 0); mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) - mi_assert_internal(page->key != 0); + mi_assert_internal(page->key[1] != 0); + mi_assert_internal(page->key[2] != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); @@ -664,34 +649,19 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) { // search through the pages in "next fit" order - mi_page_t* rpage = NULL; size_t count = 0; - size_t page_free_count = 0; mi_page_t* page = pq->first; - while( page != NULL) + while (page != NULL) { mi_page_t* next = page->next; // remember next count++; // 0. collect freed blocks by us and other threads - _mi_page_free_collect(page,false); + _mi_page_free_collect(page, false); // 1. if the page contains free blocks, we are done if (mi_page_immediate_available(page)) { - // If all blocks are free, we might retire this page instead. - // do this at most 8 times to bound allocation time. - // (note: this can happen if a page was earlier not retired due - // to having neighbours that were mostly full or due to concurrent frees) - if (page_free_count < 8 && mi_page_all_free(page)) { - page_free_count++; - if (rpage != NULL) _mi_page_free(rpage,pq,false); - rpage = page; - page = next; - continue; // and keep looking - } - else { - break; // pick this one - } + break; // pick this one } // 2. Try to extend @@ -704,20 +674,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // 3. If the page is completely full, move it to the `mi_pages_full` // queue so we don't visit long-lived pages too often. mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); - mi_page_to_full(page,pq); + mi_page_to_full(page, pq); page = next; } // for each page - mi_stat_counter_increase(heap->tld->stats.searches,count); - - if (page == NULL) { - page = rpage; - rpage = NULL; - } - if (rpage != NULL) { - _mi_page_free(rpage,pq,false); - } + mi_stat_counter_increase(heap->tld->stats.searches, count); if (page == NULL) { page = mi_page_fresh(heap, pq); @@ -729,11 +691,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_assert_internal(page == NULL || mi_page_immediate_available(page)); // finally collect retired pages - _mi_heap_collect_retired(heap,false); + _mi_heap_collect_retired(heap, false); return page; } + // Find a page with free blocks of `size`. static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = mi_page_queue(heap,size); @@ -794,14 +757,15 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); if (page != NULL) { + const size_t bsize = mi_page_block_size(page); mi_assert_internal(mi_page_immediate_available(page)); - mi_assert_internal(page->block_size == block_size); + mi_assert_internal(bsize >= size); mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->used==1); mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + mi_page_set_heap(page, NULL); - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { _mi_stat_increase(&heap->tld->stats.giant, block_size); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); } @@ -849,7 +813,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept if (page == NULL) return NULL; // out of memory mi_assert_internal(mi_page_immediate_available(page)); - mi_assert_internal(page->block_size >= size); + mi_assert_internal(mi_page_block_size(page) >= size); // and try again, this time succeeding! (i.e. this should never recurse) return _mi_page_malloc(heap, page, size); diff --git a/src/segment.c b/src/segment.c index 97859fa9..4fb3e28b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -208,8 +208,8 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m mi_assert_internal(size <= psize); size_t reset_size = (size == 0 || size > psize ? psize : size); if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { - mi_assert_internal(page->block_size > 0); - reset_size = page->capacity * page->block_size; + mi_assert_internal(page->xblock_size > 0); + reset_size = page->capacity * mi_page_block_size(page); } _mi_mem_reset(start, reset_size, tld->os); } @@ -223,8 +223,8 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); size_t unreset_size = (size == 0 || size > psize ? psize : size); if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { - mi_assert_internal(page->block_size > 0); - unreset_size = page->capacity * page->block_size; + mi_assert_internal(page->xblock_size > 0); + unreset_size = page->capacity * mi_page_block_size(page); } bool is_zero = false; _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); @@ -255,7 +255,7 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_ } if (page_size != NULL) *page_size = psize; - mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page); + mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } @@ -278,7 +278,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } if (page_size != NULL) *page_size = psize; - mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page); + mi_assert_internal(page->xblock_size==0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } @@ -605,7 +605,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); - size_t inuse = page->capacity * page->block_size; + size_t inuse = page->capacity * mi_page_block_size(page); _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); @@ -707,6 +707,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment)); segment->abandoned++; @@ -765,9 +767,12 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen if (page->segment_in_use) { mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { // if everything free by now, free the page mi_segment_page_clear(segment,page,tld); From ad32eb1dfb2b73ed8eaecfdc14e01cbbf43d05b2 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 17:57:19 -0800 Subject: [PATCH 167/293] eager collect on page reclamation --- src/page.c | 10 +++------- src/segment.c | 7 +++++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/page.c b/src/page.c index 40aec0c6..02f10238 100644 --- a/src/page.c +++ b/src/page.c @@ -229,16 +229,12 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // called from segments when reclaiming abandoned pages void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); - mi_assert_internal(mi_page_heap(page) == NULL); + mi_assert_internal(mi_page_heap(page) == heap); + mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(!page->is_reset); - mi_assert_internal(mi_page_thread_free_flag(page) == MI_NEVER_DELAYED_FREE); - mi_page_set_heap(page, heap); mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); - mi_page_queue_push(heap, pq, page); - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) - // _mi_page_free_collect(page,false); // no need, as it is just done before reclaim - mi_assert_internal(mi_page_heap(page)!= NULL); + mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); } diff --git a/src/segment.c b/src/segment.c index 4fb3e28b..d27a7c13 100644 --- a/src/segment.c +++ b/src/segment.c @@ -772,13 +772,16 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + // set the heap again and allow delayed free again + mi_page_set_heap(page, heap); + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { - // if everything free by now, free the page + // if everything free already, clear the page directly mi_segment_page_clear(segment,page,tld); } else { - // otherwise reclaim it + // otherwise reclaim it into the heap _mi_page_reclaim(heap,page); } } From 9629d731888f64db99e43016c916268a73a5f02f Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 18:07:29 -0800 Subject: [PATCH 168/293] fix options --- src/options.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/options.c b/src/options.c index ce21309d..f1d8205f 100644 --- a/src/options.c +++ b/src/options.c @@ -67,6 +67,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread + { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds From b8072aaacb581b9655545b9960456c239b7c59af Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 03:54:51 -0800 Subject: [PATCH 169/293] fix debug build --- src/heap.c | 34 +++++++++++++++++----------------- src/segment.c | 7 +++++-- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/heap.c b/src/heap.c index 9f2a4457..12aa0840 100644 --- a/src/heap.c +++ b/src/heap.c @@ -56,7 +56,8 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ mi_assert_expensive(_mi_page_is_valid(page)); return true; } - +#endif +#if MI_DEBUG>=3 static bool mi_heap_is_valid(mi_heap_t* heap) { mi_assert_internal(heap!=NULL); mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL); @@ -111,7 +112,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { if (!mi_heap_is_initialized(heap)) return; _mi_deferred_free(heap, collect > NORMAL); - + // collect (some) abandoned pages if (collect >= NORMAL && !heap->no_reclaim) { if (collect == NORMAL) { @@ -123,8 +124,8 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) collect == FORCE #else collect >= FORCE - #endif - && _mi_is_main_thread() && mi_heap_is_backing(heap)) + #endif + && _mi_is_main_thread() && mi_heap_is_backing(heap)) { // the main thread is abandoned, try to free all abandoned segments. // if all memory is freed by now, all segments should be freed. @@ -135,19 +136,19 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // if abandoning, mark all pages to no longer add to delayed_free if (collect == ABANDON) { //for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) { - // _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE - //} + // _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE + //} mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); } - // free thread delayed blocks. + // free thread delayed blocks. // (if abandoning, after this there are no more local references into the pages.) _mi_heap_delayed_free(heap); // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != ABANDON || heap->thread_delayed_free == NULL ); - + // collect segment caches if (collect >= FORCE) { _mi_segment_thread_collect(&heap->tld->segments); @@ -177,7 +178,7 @@ void mi_collect(bool force) mi_attr_noexcept { ----------------------------------------------------------- */ mi_heap_t* mi_heap_get_default(void) { - mi_thread_init(); + mi_thread_init(); return mi_get_default_heap(); } @@ -198,7 +199,7 @@ mi_heap_t* mi_heap_new(void) { heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); _mi_random_split(&bheap->random, &heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; + heap->cookie = _mi_heap_random_next(heap) | 1; heap->key[0] = _mi_heap_random_next(heap); heap->key[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe @@ -226,7 +227,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) { static void mi_heap_free(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_initialized(heap)); if (mi_heap_is_backing(heap)) return; // dont free the backing heap - + // reset default if (mi_heap_is_default(heap)) { _mi_heap_set_default_direct(heap->tld->heap_backing); @@ -247,7 +248,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ UNUSED(pq); // ensure no more thread_delayed_free will be added - _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); // stats const size_t bsize = mi_page_block_size(page); @@ -311,7 +312,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { if (from==NULL || from->page_count == 0) return; // unfull all full pages in the `from` heap - mi_page_t* page = from->pages[MI_BIN_FULL].first; + mi_page_t* page = from->pages[MI_BIN_FULL].first; while (page != NULL) { mi_page_t* next = page->next; _mi_page_unfull(page); @@ -323,7 +324,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { _mi_heap_delayed_free(from); // transfer all pages by appending the queues; this will set - // a new heap field which is ok as all pages are unfull'd and thus + // a new heap field which is ok as all pages are unfull'd and thus // other threads won't access this field anymore (see `mi_free_block_mt`) for (size_t i = 0; i < MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; @@ -334,7 +335,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { } mi_assert_internal(from->thread_delayed_free == NULL); mi_assert_internal(from->page_count == 0); - + // and reset the `from` heap mi_heap_reset_pages(from); } @@ -362,7 +363,7 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { mi_assert(mi_heap_is_initialized(heap)); if (!mi_heap_is_initialized(heap)) return NULL; mi_assert_expensive(mi_heap_is_valid(heap)); - mi_heap_t* old = mi_get_default_heap(); + mi_heap_t* old = mi_get_default_heap(); _mi_heap_set_default_direct(heap); return old; } @@ -534,4 +535,3 @@ bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_vis mi_visit_blocks_args_t args = { visit_blocks, visitor, arg }; return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args); } - diff --git a/src/segment.c b/src/segment.c index ee1de005..3f99c790 100644 --- a/src/segment.c +++ b/src/segment.c @@ -135,7 +135,7 @@ static size_t mi_segment_page_size(const mi_segment_t* segment) { } -#if (MI_DEBUG>=3) +#if (MI_DEBUG>=2) static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tld) { mi_page_t* p = tld->pages_reset.first; while (p != NULL) { @@ -144,7 +144,9 @@ static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tl } return false; } +#endif +#if (MI_DEBUG>=3) static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment != NULL); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); @@ -169,6 +171,7 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* #endif static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(page != NULL); if (page->next != NULL || page->prev != NULL) { mi_assert_internal(mi_pages_reset_contains(page, tld)); return false; @@ -1052,6 +1055,6 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); mi_reset_delayed(tld); - mi_assert_internal(mi_page_not_in_queue(page, tld)); + mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); return page; } From 8d8f355ed0190702edcce7d16d9fdad7466ae2b7 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 11:25:02 -0800 Subject: [PATCH 170/293] add option to reset eagerly when a segment is abandoned --- include/mimalloc.h | 1 + src/options.c | 1 + src/segment.c | 19 ++++++++++++------- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index fe09c7f2..e45b7e4d 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -273,6 +273,7 @@ typedef enum mi_option_e { mi_option_reserve_huge_os_pages, mi_option_segment_cache, mi_option_page_reset, + mi_option_abandoned_page_reset, mi_option_segment_reset, mi_option_eager_commit_delay, mi_option_reset_delay, diff --git a/src/options.c b/src/options.c index f1d8205f..c12c77e0 100644 --- a/src/options.c +++ b/src/options.c @@ -68,6 +68,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds diff --git a/src/segment.c b/src/segment.c index 3f99c790..ea030d7a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -326,12 +326,15 @@ static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) { page->used = 0; } -static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, mi_segments_tld_t* tld) { - if (segment->mem_is_fixed) return; +static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool force_reset, mi_segments_tld_t* tld) { + if (segment->mem_is_fixed) return; // never reset in huge OS pages for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use && !page->is_reset) { mi_pages_reset_remove(page, tld); + if (force_reset) { + mi_page_reset(segment, page, 0, tld); + } } else { mi_assert_internal(mi_page_not_in_queue(page,tld)); @@ -668,9 +671,11 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { - UNUSED(force); - mi_assert(segment != NULL); - mi_pages_reset_remove_all_in_segment(segment, tld); + UNUSED(force); + mi_assert(segment != NULL); + // note: don't reset pages even on abandon as the whole segment is freed? (and ready for reuse) + bool force_reset = (force && mi_option_is_enabled(mi_option_abandoned_page_reset)); + mi_pages_reset_remove_all_in_segment(segment, force_reset, tld); mi_segment_remove_from_free_queue(segment,tld); mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); @@ -840,8 +845,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_expensive(mi_segment_is_valid(segment,tld)); // remove the segment from the free page queue if needed - mi_reset_delayed(tld); - mi_pages_reset_remove_all_in_segment(segment, tld); // do not force reset on free pages in an abandoned segment, as it is already done in segment_thread_collect + mi_reset_delayed(tld); + mi_pages_reset_remove_all_in_segment(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld); mi_segment_remove_from_free_queue(segment, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); From 4e91eab8fca9dfa95f74a7205f8f216dd9f22f02 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 14:12:37 -0800 Subject: [PATCH 171/293] specialize mi_mallocn for count=1 --- src/alloc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/alloc.c b/src/alloc.c index 621fb0db..be63f86a 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -443,7 +443,12 @@ mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { // Uninitialized `calloc` extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count,size,&total)) return NULL; + if (count==1) { + total = size; + } + else if (mi_mul_overflow(count, size, &total)) { + return NULL; + } return mi_heap_malloc(heap, total); } From 24f8bcbc8f4236b2bd37b1c8bfc169ec9a941942 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 14:25:09 -0800 Subject: [PATCH 172/293] add explicit calling convention to registered functions --- include/mimalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index e45b7e4d..de4282da 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -108,10 +108,10 @@ mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; -typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); +typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept; -typedef void (mi_output_fun)(const char* msg, void* arg); +typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg); mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; mi_decl_export void mi_collect(bool force) mi_attr_noexcept; From 1b2b7404f7770022ec806a294fa35e145cb93849 Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Thu, 16 Jan 2020 14:54:13 -0800 Subject: [PATCH 173/293] flip the order of includes for ease of use --- include/mimalloc-stl-allocator.h | 1 - include/mimalloc.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/mimalloc-stl-allocator.h b/include/mimalloc-stl-allocator.h index 11ba30fb..a98e398b 100644 --- a/include/mimalloc-stl-allocator.h +++ b/include/mimalloc-stl-allocator.h @@ -7,7 +7,6 @@ This header can be used to hook mimalloc into STL containers in place of std::allocator. -----------------------------------------------------------------------------*/ -#include #include // true_type #pragma warning(disable: 4100) diff --git a/include/mimalloc.h b/include/mimalloc.h index 7f26896c..988a080d 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -73,6 +73,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // bool #ifdef __cplusplus +#include + extern "C" { #endif From 526bee6843e2d80a57671f68115c504138791cd0 Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Thu, 16 Jan 2020 15:17:15 -0800 Subject: [PATCH 174/293] merge stl and main header --- CMakeLists.txt | 1 - ide/vs2017/mimalloc-override.vcxproj | 1 - ide/vs2017/mimalloc.vcxproj | 1 - ide/vs2019/mimalloc-override.vcxproj | 1 - ide/vs2019/mimalloc.vcxproj | 1 - include/mimalloc-stl-allocator.h | 43 ---------------------------- include/mimalloc.h | 38 +++++++++++++++++++++++- test/test-api.c | 1 - 8 files changed, 37 insertions(+), 50 deletions(-) delete mode 100644 include/mimalloc-stl-allocator.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 93560951..467fad95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,7 +187,6 @@ install(TARGETS mimalloc-static EXPORT mimalloc DESTINATION ${mi_install_dir}) install(FILES include/mimalloc.h DESTINATION ${mi_install_dir}/include) install(FILES include/mimalloc-override.h DESTINATION ${mi_install_dir}/include) install(FILES include/mimalloc-new-delete.h DESTINATION ${mi_install_dir}/include) -install(FILES include/mimalloc-stl-allocator.h DESTINATION ${mi_install_dir}/include) install(FILES cmake/mimalloc-config.cmake DESTINATION ${mi_install_dir}/cmake) install(FILES cmake/mimalloc-config-version.cmake DESTINATION ${mi_install_dir}/cmake) install(EXPORT mimalloc DESTINATION ${mi_install_dir}/cmake) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index e0a6d85b..863195a3 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -214,7 +214,6 @@ - diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index ff6c8edb..064a13dc 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -239,7 +239,6 @@ - diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index e6416e05..950a0a1a 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -214,7 +214,6 @@ - diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index ffede6ca..17adc958 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -239,7 +239,6 @@ - diff --git a/include/mimalloc-stl-allocator.h b/include/mimalloc-stl-allocator.h deleted file mode 100644 index a98e398b..00000000 --- a/include/mimalloc-stl-allocator.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once -#ifndef MIMALLOC_STL_ALLOCATOR_H -#define MIMALLOC_STL_ALLOCATOR_H - -#ifdef __cplusplus -/* ---------------------------------------------------------------------------- -This header can be used to hook mimalloc into STL containers in place of -std::allocator. ------------------------------------------------------------------------------*/ -#include // true_type - -#pragma warning(disable: 4100) - -template -struct mi_stl_allocator { - typedef T value_type; - - using propagate_on_container_copy_assignment = std::true_type; - using propagate_on_container_move_assignment = std::true_type; - using propagate_on_container_swap = std::true_type; - using is_always_equal = std::true_type; - - mi_stl_allocator() noexcept {} - mi_stl_allocator(const mi_stl_allocator& other) noexcept {} - template - mi_stl_allocator(const mi_stl_allocator& other) noexcept {} - - T* allocate(size_t n, const void* hint = 0) { - return (T*)mi_mallocn(n, sizeof(T)); - } - - void deallocate(T* p, size_t n) { - mi_free(p); - } -}; - -template -bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return true; } -template -bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return false; } - -#endif // __cplusplus -#endif // MIMALLOC_STL_ALLOCATOR_H diff --git a/include/mimalloc.h b/include/mimalloc.h index 988a080d..e664b668 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -73,7 +73,7 @@ terms of the MIT license. A copy of the license can be found in the file #include // bool #ifdef __cplusplus -#include +#include // true_type extern "C" { #endif @@ -328,5 +328,41 @@ mi_decl_export void* mi_new_aligned_nothrow(size_t n, size_t alignment) mi_attr_ } #endif +#ifdef __cplusplus + +// ------------------------------------------------------ +// STL allocator - an extension to hook mimalloc into STL +// containers in place of std::allocator. +// ------------------------------------------------------ + +#pragma warning(disable: 4100) +template +struct mi_stl_allocator { + typedef T value_type; + + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + using is_always_equal = std::true_type; + + mi_stl_allocator() noexcept {} + mi_stl_allocator(const mi_stl_allocator& other) noexcept {} + template + mi_stl_allocator(const mi_stl_allocator& other) noexcept {} + + T* allocate(size_t n, const void* hint = 0) { + return (T*)mi_mallocn(n, sizeof(T)); + } + + void deallocate(T* p, size_t n) { + mi_free(p); + } +}; + +template +bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return true; } +template +bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return false; } +#endif #endif diff --git a/test/test-api.c b/test/test-api.c index f93884d0..060efc44 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -32,7 +32,6 @@ we therefore test the API over various inputs. Please add more tests :-) #include "mimalloc.h" #include "mimalloc-internal.h" -#include "mimalloc-stl-allocator.h" // --------------------------------------------------------------------------- // Test macros: CHECK(name,predicate) and CHECK_BODY(name,body) From 7a9502973d4c20bd5ac962a9b6e5869494990025 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 15:57:11 -0800 Subject: [PATCH 175/293] rearrange STL allocator code: remove pragma, ifdef for C++11 --- ide/vs2019/mimalloc.vcxproj | 2 +- include/mimalloc-internal.h | 5 ++++ include/mimalloc.h | 46 +++++++++++++++---------------------- src/alloc.c | 7 +----- 4 files changed, 25 insertions(+), 35 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index f59de292..037e380d 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -111,7 +111,7 @@ - Level2 + Level4 Disabled true true diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index a9391a40..500764ed 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -167,6 +167,11 @@ bool _mi_page_is_valid(mi_page_t* page); // Overflow detecting multiply static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { + // quick check for the case where count is one (common for C++ allocators) + if (count==1) { + *total = size; + return false; + } #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 #include // UINT_MAX, ULONG_MAX #if (SIZE_MAX == UINT_MAX) diff --git a/include/mimalloc.h b/include/mimalloc.h index 29481c80..4c5b0cad 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -73,8 +73,6 @@ terms of the MIT license. A copy of the license can be found in the file #include // bool #ifdef __cplusplus -#include // true_type - extern "C" { #endif @@ -337,41 +335,33 @@ mi_decl_export void* mi_new_aligned_nothrow(size_t n, size_t alignment) mi_attr_ } #endif +// --------------------------------------------------------------------------------------------- +// Implement the C++ std::allocator interface for use in STL containers. +// (note: see `mimalloc-new-delete.h` for overriding the new/delete operators globally) +// --------------------------------------------------------------------------------------------- #ifdef __cplusplus -// ------------------------------------------------------ -// STL allocator - an extension to hook mimalloc into STL -// containers in place of std::allocator. -// ------------------------------------------------------ +#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 +#include // true_type +#endif -#pragma warning(disable: 4100) -template -struct mi_stl_allocator { +template struct mi_stl_allocator { typedef T value_type; - +#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 using propagate_on_container_copy_assignment = std::true_type; using propagate_on_container_move_assignment = std::true_type; using propagate_on_container_swap = std::true_type; using is_always_equal = std::true_type; - - mi_stl_allocator() noexcept {} - mi_stl_allocator(const mi_stl_allocator& other) noexcept {} - template - mi_stl_allocator(const mi_stl_allocator& other) noexcept {} - - T* allocate(size_t n, const void* hint = 0) { - return (T*)mi_mallocn(n, sizeof(T)); - } - - void deallocate(T* p, size_t n) { - mi_free(p); - } +#endif + mi_stl_allocator() mi_attr_noexcept {} + mi_stl_allocator(const mi_stl_allocator& other) mi_attr_noexcept { (void)other; } + template mi_stl_allocator(const mi_stl_allocator& other) mi_attr_noexcept { (void)other; } + T* allocate(size_t n, const void* hint = 0) { (void)hint; return (T*)mi_mallocn(n, sizeof(T)); } + void deallocate(T* p, size_t n) { mi_free_size(p,n); } }; -template -bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return true; } -template -bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return false; } -#endif +template bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) mi_attr_noexcept { (void)lhs; (void)rhs; return true; } +template bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) mi_attr_noexcept { (void)lhs; (void)rhs; return false; } +#endif // __cplusplus #endif diff --git a/src/alloc.c b/src/alloc.c index be63f86a..d66c629b 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -443,12 +443,7 @@ mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { // Uninitialized `calloc` extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (count==1) { - total = size; - } - else if (mi_mul_overflow(count, size, &total)) { - return NULL; - } + if (mi_mul_overflow(count, size, &total)) return NULL; return mi_heap_malloc(heap, total); } From a0bee081dde0ed7ef102f484c1436f9dd292522b Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 15:58:48 -0800 Subject: [PATCH 176/293] use proper C++11 check for noexcept attribute --- ide/vs2019/mimalloc.vcxproj | 2 +- include/mimalloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 037e380d..f59de292 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -111,7 +111,7 @@ - Level4 + Level2 Disabled true true diff --git a/include/mimalloc.h b/include/mimalloc.h index 4c5b0cad..59f394a7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -15,10 +15,10 @@ terms of the MIT license. A copy of the license can be found in the file // ------------------------------------------------------ #ifdef __cplusplus - #if (__GNUC__ <= 5) || (_MSC_VER <= 1900) - #define mi_attr_noexcept throw() - #else + #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 #define mi_attr_noexcept noexcept + #else + #define mi_attr_noexcept throw() #endif #else #define mi_attr_noexcept From 3447debf26e41e25e2f18908ebeeb3b99ca93fa0 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:08:23 -0800 Subject: [PATCH 177/293] add Linux gcc C++ build to azure pipeline --- azure-pipelines.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f88b2e1a..844a4d08 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -61,6 +61,11 @@ jobs: CXX: g++ BuildType: secure cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON + Debug++: + CC: gcc + CXX: g++ + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON Debug Clang: CC: clang CXX: clang++ From 7a98a461a333b195b8ab090484a4e4be4fcd05fb Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:21:59 -0800 Subject: [PATCH 178/293] fix type of bitmap field to avoid C++ error on older gcc --- src/bitmap.inc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 11ada472..c3813a44 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -113,7 +113,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_f mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS); - mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]); + uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); if ((field & mask) == 0) { // free? if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) { // claimed! @@ -221,7 +221,7 @@ static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_field const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]); + uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); if (any_ones != NULL) *any_ones = ((field & mask) != 0); return ((field & mask) == mask); } From da709bcf110c74356ba5f0fc3267e7ccb47bf91c Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:30:44 -0800 Subject: [PATCH 179/293] test for gcc version for aligned new/delete overrides --- src/alloc-override.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alloc-override.c b/src/alloc-override.c index 002374bb..89c5126a 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -98,7 +98,7 @@ terms of the MIT license. A copy of the license can be found in the file void operator delete[](void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n); #endif - #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) + #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5)) void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; From 3f8b8b7a55b2113f75c9eb8173841f1d9c0de676 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:45:34 -0800 Subject: [PATCH 180/293] initialize thread_delayed_free field atomically --- src/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/init.c b/src/init.c index d99eeb40..b8422c2f 100644 --- a/src/init.c +++ b/src/init.c @@ -122,7 +122,7 @@ mi_heap_t _mi_heap_main = { &tld_main, MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, - NULL, + ATOMIC_VAR_INIT(NULL), 0, // thread id MI_INIT_COOKIE, // initial cookie { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) From 5c82b459d6401dec4b92b4ae81248f35ca3fa1fb Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:49:59 -0800 Subject: [PATCH 181/293] enable debug clang++ compilation in azure pipelines --- azure-pipelines.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 844a4d08..8ff0b629 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -81,6 +81,11 @@ jobs: CXX: clang++ BuildType: secure-clang cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON + Debug++ Clang: + CC: clang + CXX: clang++ + BuildType: debug-clang + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON steps: - task: CMake@1 inputs: From f750e793ff60c6353d50ae106579d9317a17a54f Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:52:44 -0800 Subject: [PATCH 182/293] ensure unique build names for C++ azure pipeline jobs --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8ff0b629..6c7bad96 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -64,7 +64,7 @@ jobs: Debug++: CC: gcc CXX: g++ - BuildType: debug + BuildType: debug-cxx cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON Debug Clang: CC: clang @@ -84,7 +84,7 @@ jobs: Debug++ Clang: CC: clang CXX: clang++ - BuildType: debug-clang + BuildType: debug-clang-cxx cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON steps: - task: CMake@1 From c6037ac8f2c92cc07248051e58e88bf3aac7cf05 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:56:02 -0800 Subject: [PATCH 183/293] fix assertion index --- src/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index 02f10238..84baf306 100644 --- a/src/page.c +++ b/src/page.c @@ -626,8 +626,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) + mi_assert_internal(page->key[0] != 0); mi_assert_internal(page->key[1] != 0); - mi_assert_internal(page->key[2] != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); From b84b11b6a90ab26d581c4a569748cef80be38276 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 18:11:29 -0800 Subject: [PATCH 184/293] update readme --- readme.md | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/readme.md b/readme.md index 2cbd1baf..b6258cfc 100644 --- a/readme.md +++ b/readme.md @@ -56,8 +56,10 @@ Enjoy! ### Releases -* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and stronger -free list encoding in secure mode. +* 2020-01-XX, `v1.4.0`: stable release 1.4: delayed OS page reset for (much) better performance + with page reset enabled, more eager concurrent free, addition of STL allocator. +* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger +free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. @@ -130,7 +132,7 @@ mimalloc uses only safe OS calls (`mmap` and `VirtualAlloc`) and can co-exist with other allocators linked to the same program. If you use `cmake`, you can simply use: ``` -find_package(mimalloc 1.0 REQUIRED) +find_package(mimalloc 1.4 REQUIRED) ``` in your `CMakeLists.txt` to find a locally installed mimalloc. Then use either: ``` @@ -144,7 +146,9 @@ to link with the static library. See `test\CMakeLists.txt` for an example. For best performance in C++ programs, it is also recommended to override the global `new` and `delete` operators. For convience, mimalloc provides -[mimalloc-new-delete.h](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. +In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator` +interface. You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): @@ -195,11 +199,15 @@ or via environment variables. - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. +- `MIMALLOC_PAGE_RESET=1`: reset (or purge) OS pages when not in use. This can reduce + memory fragmentation in long running (server) programs. If performance is impacted, + `MIMALLOC_RESET_DELAY=` can be set higher (100ms by default) to make the page + reset occur less frequently. - `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that - can have fragmented memory. + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead when possible). - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions show in the working set even though usually just a small part is committed to physical memory. This is why it turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better @@ -207,11 +215,16 @@ or via environment variables. - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at startup and can give quite a performance improvement on long running workloads. Usually it is better to not use `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving - contiguous physical memory can take a long time when memory is fragmented. Still experimental. + contiguous physical memory can take a long time when memory is fragmented. + Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting + `MIMALLOC_EAGER_COMMIT_DELAY=N` (with usually `N` as 1) to delay the initial `N` segments + of a thread to not allocate in the huge OS pages; this prevents threads that are short lived + and allocate just a little to take up space in the huge OS page area (which cannot be reset). [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017 + # Overriding Malloc Overriding the standard `malloc` can be done either _dynamically_ or _statically_. @@ -251,13 +264,13 @@ resolved to the _mimalloc_ library. Note that certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). -Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this -(see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)). +Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is +actively worked on to fix this (see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)). ### Windows -On Windows you need to link your program explicitly with the mimalloc -DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). +Overriding on Windows is robust but requires that you link your program explicitly with +the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to @@ -267,8 +280,8 @@ To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project for an example on how to use this. For best performance on Windows with C++, it -is highly recommended to also override the `new`/`delete` operations (as described -in the introduction). +is highly recommended to also override the `new`/`delete` operations (by including +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. From 6b7356a10aad3ff839689fbc2e50e11512d910b5 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 18:12:01 -0800 Subject: [PATCH 185/293] make mimalloc compile with the highest warning level on msvc --- ide/vs2019/mimalloc.vcxproj | 2 +- include/mimalloc-internal.h | 1 + src/memory.c | 22 +++++++++++----------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index f59de292..037e380d 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -111,7 +111,7 @@ - Level2 + Level4 Disabled true true diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 500764ed..f039fc50 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -21,6 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file #endif #if defined(_MSC_VER) +#pragma warning(disable:4127) // constant conditional due to MI_SECURE paths #define mi_decl_noinline __declspec(noinline) #define mi_attr_noreturn #elif defined(__GNUC__) || defined(__clang__) diff --git a/src/memory.c b/src/memory.c index ee84f755..9603a26f 100644 --- a/src/memory.c +++ b/src/memory.c @@ -80,7 +80,7 @@ typedef union mi_region_info_u { bool valid; bool is_large; short numa_node; - }; + } x; } mi_region_info_t; @@ -204,9 +204,9 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, // and share it mi_region_info_t info; - info.valid = true; - info.is_large = region_large; - info.numa_node = _mi_os_numa_node(tld); + info.x.valid = true; + info.x.is_large = region_large; + info.x.numa_node = (short)_mi_os_numa_node(tld); mi_atomic_write(&r->info, info.value); // now make it available to others *region = r; return true; @@ -224,12 +224,12 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo // numa correct if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = info.numa_node; + int rnode = info.x.numa_node; if (rnode >= 0 && rnode != numa_node) return false; } // check allow-large - if (!allow_large && info.is_large) return false; + if (!allow_large && info.x.is_large) return false; return true; } @@ -278,11 +278,11 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_region_info_t info; info.value = mi_atomic_read(®ion->info); void* start = mi_atomic_read_ptr(®ion->start); - mi_assert_internal(!(info.is_large && !*is_large)); + mi_assert_internal(!(info.x.is_large && !*is_large)); mi_assert_internal(start != NULL); *is_zero = mi_bitmap_unclaim(®ion->dirty, 1, blocks, bit_idx); - *is_large = info.is_large; + *is_large = info.x.is_large; *memid = mi_memid_create(region, bit_idx); void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); @@ -292,7 +292,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo bool any_uncommitted; mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); if (any_uncommitted) { - mi_assert_internal(!info.is_large); + mi_assert_internal(!info.x.is_large); bool commit_zero; _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); if (commit_zero) *is_zero = true; @@ -307,7 +307,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // unreset reset blocks if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { // some blocks are still reset - mi_assert_internal(!info.is_large); + mi_assert_internal(!info.x.is_large); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed @@ -412,7 +412,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re } // reset the blocks to reduce the working set. - if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) + if (!info.x.is_large && mi_option_is_enabled(mi_option_segment_reset) && (mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead { From dc94d25890e965fb317ee15f4bca6e7b30b8898f Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 19:39:14 -0800 Subject: [PATCH 186/293] update documentation --- doc/mimalloc-doc.h | 128 +++++-- docs/annotated.html | 2 +- docs/bench.html | 2 +- docs/build.html | 4 +- docs/classes.html | 2 +- docs/environment.html | 127 +++++++ docs/functions.html | 2 +- docs/functions_vars.html | 2 +- docs/group__aligned.html | 2 +- docs/group__analysis.html | 2 +- docs/group__extended.html | 349 ++++++++++++------ docs/group__extended.js | 16 +- docs/group__heap.html | 2 +- docs/group__malloc.html | 2 +- docs/group__options.html | 21 +- docs/group__options.js | 5 +- docs/group__posix.html | 2 +- docs/group__typed.html | 2 +- docs/group__zeroinit.html | 597 +++++++++++++++++++++++++++++++ docs/group__zeroinit.js | 14 + docs/index.html | 4 +- docs/mimalloc-doc_8h_source.html | 68 ++-- docs/modules.html | 2 +- docs/navtreeindex0.js | 41 ++- docs/overrides.html | 10 +- docs/pages.html | 2 +- docs/search/all_6.js | 18 +- docs/search/all_c.html | 30 ++ docs/search/all_c.js | 4 + docs/search/all_d.html | 30 ++ docs/search/all_d.js | 4 + docs/search/enumvalues_1.js | 3 +- docs/search/functions_0.js | 11 +- docs/search/functions_1.html | 30 ++ docs/search/functions_1.js | 4 + docs/search/groups_7.html | 30 ++ docs/search/groups_7.js | 4 + docs/search/pages_4.html | 30 ++ docs/search/pages_4.js | 4 + docs/search/typedefs_0.js | 4 +- docs/search/typedefs_1.html | 30 ++ docs/search/typedefs_1.js | 4 + docs/search/typedefs_2.html | 30 ++ docs/search/typedefs_2.js | 5 + docs/using.html | 8 +- 45 files changed, 1437 insertions(+), 256 deletions(-) create mode 100644 docs/environment.html create mode 100644 docs/group__zeroinit.html create mode 100644 docs/group__zeroinit.js create mode 100644 docs/search/all_c.html create mode 100644 docs/search/all_c.js create mode 100644 docs/search/all_d.html create mode 100644 docs/search/all_d.js create mode 100644 docs/search/functions_1.html create mode 100644 docs/search/functions_1.js create mode 100644 docs/search/groups_7.html create mode 100644 docs/search/groups_7.js create mode 100644 docs/search/pages_4.html create mode 100644 docs/search/pages_4.js create mode 100644 docs/search/typedefs_1.html create mode 100644 docs/search/typedefs_1.js create mode 100644 docs/search/typedefs_2.html create mode 100644 docs/search/typedefs_2.js diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 5ad5a1e6..71cc1589 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -26,7 +26,7 @@ without code changes, for example, on Unix you can use it as: Notable aspects of the design include: -- __small and consistent__: the library is less than 3500 LOC using simple and +- __small and consistent__: the library is less than 6k LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic _heartbeat_ and deferred freeing (for @@ -297,10 +297,17 @@ size_t mi_good_size(size_t size); void mi_collect(bool force); /// Print the main statistics. -/// @param out Output function. Use \a NULL for outputting to \a stderr. +/// @param out Ignored, outputs to the registered output function or stderr by default. /// /// Most detailed when using a debug build. -void mi_stats_print(mi_output_fun* out); +void mi_stats_print(void* out); + +/// Print the main statistics. +/// @param out An output function or \a NULL for the default. +/// @param arg Optional argument passed to \a out (if not \a NULL) +/// +/// Most detailed when using a debug build. +void mi_stats_print(mi_output_fun* out, void* arg); /// Reset statistics. void mi_stats_reset(void); @@ -320,20 +327,23 @@ void mi_thread_init(void); void mi_thread_done(void); /// Print out heap statistics for this thread. -/// @param out Output function. Use \a NULL for outputting to \a stderr. +/// @param out An output function or \a NULL for the default. +/// @param arg Optional argument passed to \a out (if not \a NULL) /// /// Most detailed when using a debug build. -void mi_thread_stats_print(mi_output_fun* out); +void mi_thread_stats_print_out(mi_output_fun* out, void* arg); /// Type of deferred free functions. /// @param force If \a true all outstanding items should be freed. /// @param heartbeat A monotonically increasing count. +/// @param arg Argument that was passed at registration to hold extra state. /// /// @see mi_register_deferred_free -typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); +typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); /// Register a deferred free function. /// @param deferred_free Address of a deferred free-ing function or \a NULL to unregister. +/// @param arg Argument that will be passed on to the deferred free function. /// /// Some runtime systems use deferred free-ing, for example when using /// reference counting to limit the worst case free time. @@ -346,20 +356,22 @@ typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); /// to be called deterministically after some number of allocations /// (regardless of freeing or available free memory). /// At most one \a deferred_free function can be active. -void mi_register_deferred_free(mi_deferred_free_fun* deferred_free); +void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg); /// Type of output functions. /// @param msg Message to output. +/// @param arg Argument that was passed at registration to hold extra state. /// /// @see mi_register_output() -typedef void (mi_output_fun)(const char* msg); +typedef void (mi_output_fun)(const char* msg, void* arg); /// Register an output function. -/// @param out The output function, use `NULL` to output to stdout. +/// @param out The output function, use `NULL` to output to stderr. +/// @param arg Argument that will be passed on to the output function. /// /// The `out` function is called to output any information from mimalloc, /// like verbose or warning messages. -void mi_register_output(mi_output_fun* out) mi_attr_noexcept; +void mi_register_output(mi_output_fun* out, void* arg); /// Is a pointer part of our heap? /// @param p The pointer to check. @@ -367,18 +379,35 @@ void mi_register_output(mi_output_fun* out) mi_attr_noexcept; /// This function is relatively fast. bool mi_is_in_heap_region(const void* p); -/// Reserve \a pages of huge OS pages (1GiB) but stops after at most `max_secs` seconds. + +/// Reserve \a pages of huge OS pages (1GiB) evenly divided over \a numa_nodes nodes, +/// but stops after at most `timeout_msecs` seconds. /// @param pages The number of 1GiB pages to reserve. -/// @param max_secs Maximum number of seconds to try reserving. -/// @param pages_reserved If not \a NULL, it is set to the actual number of pages that were reserved. +/// @param numa_nodes The number of nodes do evenly divide the pages over, or 0 for using the actual number of NUMA nodes. +/// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout. /// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out. /// /// The reserved memory is used by mimalloc to satisfy allocations. -/// May quit before \a max_secs are expired if it estimates it will take more than -/// 1.5 times \a max_secs. The time limit is needed because on some operating systems +/// May quit before \a timeout_msecs are expired if it estimates it will take more than +/// 1.5 times \a timeout_msecs. The time limit is needed because on some operating systems /// it can take a long time to reserve contiguous memory if the physical memory is /// fragmented. -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved); +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs); + +/// Reserve \a pages of huge OS pages (1GiB) at a specific \a numa_node, +/// but stops after at most `timeout_msecs` seconds. +/// @param pages The number of 1GiB pages to reserve. +/// @param numa_node The NUMA node where the memory is reserved (start at 0). +/// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout. +/// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out. +/// +/// The reserved memory is used by mimalloc to satisfy allocations. +/// May quit before \a timeout_msecs are expired if it estimates it will take more than +/// 1.5 times \a timeout_msecs. The time limit is needed because on some operating systems +/// it can take a long time to reserve contiguous memory if the physical memory is +/// fragmented. +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs); + /// Is the C runtime \a malloc API redirected? /// @returns \a true if all malloc API calls are redirected to mimalloc. @@ -702,13 +731,14 @@ typedef enum mi_option_e { mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows) mi_option_large_os_pages, ///< Use large OS pages (2MiB in size) if possible mi_option_reserve_huge_os_pages, ///< The number of huge OS pages (1GiB in size) to reserve at the start of the program. - mi_option_segment_cache, ///< The number of segments per thread to keep cached. - mi_option_page_reset, ///< Reset page memory when it becomes free. - mi_option_cache_reset, ///< Reset segment memory when a segment is cached. + mi_option_segment_cache, ///< The number of segments per thread to keep cached. + mi_option_page_reset, ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free. + mi_option_segment_reset, ///< Experimental + mi_option_reset_delay, ///< Delay in milli-seconds before resetting a page (100ms by default) + mi_option_use_numa_nodes, ///< Pretend there are at most N NUMA nodes mi_option_reset_decommits, ///< Experimental mi_option_eager_commit_delay, ///< Experimental - mi_option_segment_reset, ///< Experimental - mi_option_os_tag, ///< OS tag to assign to mimalloc'd memory + mi_option_os_tag, ///< OS tag to assign to mimalloc'd memory _mi_option_last } mi_option_t; @@ -774,7 +804,7 @@ git clone https://github.com/microsoft/mimalloc ## Windows -Open `ide/vs2017/mimalloc.sln` in Visual Studio 2017 and build. +Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`). The `mimalloc` project builds a static library (in `out/msvc-x64`), while the `mimalloc-override` project builds a DLL for overriding malloc in the entire program. @@ -826,6 +856,7 @@ Notes: /*! \page using Using the library +### Build The preferred usage is including ``, linking with the shared- or static library, and using the `mi_malloc` API exclusively for allocation. For example, @@ -849,6 +880,19 @@ target_link_libraries(myapp PUBLIC mimalloc-static) ``` to link with the static library. See `test\CMakeLists.txt` for an example. +### C++ +For best performance in C++ programs, it is also recommended to override the +global `new` and `delete` operators. For convience, mimalloc provides +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. + +In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator` +interface. For example: +``` +std::vector> vec; +vec.push_back(some_struct()); +``` + +### Statistics You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): @@ -897,20 +941,33 @@ See \ref overrides for more info. /*! \page environment Environment Options -You can set further options either programmatically -(using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), +You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables. - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. +- `MIMALLOC_PAGE_RESET=1`: reset (or purge) OS pages when not in use. This can reduce + memory fragmentation in long running (server) programs. If performance is impacted, + `MIMALLOC_RESET_DELAY=`_msecs_ can be set higher (100ms by default) to make the page + reset occur less frequently. - `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs - to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). + to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes + the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead when possible). - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions show in the working set even though usually just a small part is committed to physical memory. This is why it turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better to turn it on as it improves performance and has no other drawbacks. +- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at + startup and can give quite a performance improvement on long running workloads. Usually it is better to not use + `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving + contiguous physical memory can take a long time when memory is fragmented. + Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting + `MIMALLOC_EAGER_COMMIT_DELAY=N` (with usually `N` as 1) to delay the initial `N` segments + of a thread to not allocate in the huge OS pages; this prevents threads that are short lived + and allocate just a little to take up space in the huge OS page area (which cannot be reset). [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017 @@ -960,25 +1017,28 @@ Note: unfortunately, at this time, dynamic overriding on macOS seems broken but ### Windows -On Windows you need to link your program explicitly with the mimalloc -DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). +Overriding on Windows is robust but requires that you link your program explicitly with +the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available -in the same folder as the mimalloc DLL at runtime (as it as referred to by the mimalloc DLL). -The redirection DLL's ensure all calls to the C runtime malloc API get redirected to mimalloc. +in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). +The redirection DLL ensures that all calls to the C runtime malloc API get redirected to +mimalloc (in `mimalloc-override.dll`). To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project -for an example on how to use this. +for an example on how to use this. For best performance on Windows with C++, it +is highly recommended to also override the `new`/`delete` operations (by including +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic -overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc successfully redirected. +overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. -(Note: in principle, it should be possible to patch existing executables -that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the mimalloc DLL into -the import table (and putting `mimalloc-redirect.dll` in the same folder) +(Note: in principle, it is possible to patch existing executables +that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the `mimalloc-override.dll` into the import table (and putting `mimalloc-redirect.dll` in the same folder) Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)). + ## Static override On Unix systems, you can also statically link with _mimalloc_ to override the standard diff --git a/docs/annotated.html b/docs/annotated.html index dcc2e74d..4d2a8bcc 100644 --- a/docs/annotated.html +++ b/docs/annotated.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/bench.html b/docs/bench.html index 11b18550..6b289c04 100644 --- a/docs/bench.html +++ b/docs/bench.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/build.html b/docs/build.html index 3e870697..755aad88 100644 --- a/docs/build.html +++ b/docs/build.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -103,7 +103,7 @@ $(document).ready(function(){initNavTree('build.html','');});

Checkout the sources from Github:

git clone https://github.com/microsoft/mimalloc

Windows

-

Open ide/vs2017/mimalloc.sln in Visual Studio 2017 and build. The mimalloc project builds a static library (in out/msvc-x64), while the mimalloc-override project builds a DLL for overriding malloc in the entire program.

+

Open ide/vs2019/mimalloc.sln in Visual Studio 2019 and build (or ide/vs2017/mimalloc.sln). The mimalloc project builds a static library (in out/msvc-x64), while the mimalloc-override project builds a DLL for overriding malloc in the entire program.

macOS, Linux, BSD, etc.

We use cmake1 as the build system:

> mkdir -p out/release
> cd out/release
> cmake ../..
> make

This builds the library as a shared (dynamic) library (.so or .dylib), a static library (.a), and as a single object file (.o).

diff --git a/docs/classes.html b/docs/classes.html index 760b28de..e5ea3ea8 100644 --- a/docs/classes.html +++ b/docs/classes.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/environment.html b/docs/environment.html new file mode 100644 index 00000000..1063654e --- /dev/null +++ b/docs/environment.html @@ -0,0 +1,127 @@ + + + + + + + +mi-malloc: Environment Options + + + + + + + + + + + + + + + + +
+
+ + + + + + + + +
+
mi-malloc +  1.4 +
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
Environment Options
+
+
+

You can set further options either programmatically (using mi_option_set), or via environment variables.

+
    +
  • MIMALLOC_SHOW_STATS=1: show statistics when the program terminates.
  • +
  • MIMALLOC_VERBOSE=1: show verbose messages.
  • +
  • MIMALLOC_SHOW_ERRORS=1: show error and warning messages.
  • +
  • MIMALLOC_PAGE_RESET=1: reset (or purge) OS pages when not in use. This can reduce memory fragmentation in long running (server) programs. If performance is impacted, MIMALLOC_RESET_DELAY=_msecs_ can be set higher (100ms by default) to make the page reset occur less frequently.
  • +
  • MIMALLOC_LARGE_OS_PAGES=1: use large OS pages when available; for some workloads this can significantly improve performance. Use MIMALLOC_VERBOSE to check if the large OS pages are enabled – usually one needs to explicitly allow large OS pages (as on Windows and Linux). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory (for that reason, we generally recommend to use MIMALLOC_RESERVE_HUGE_OS_PAGES instead when possible).
  • +
  • MIMALLOC_EAGER_REGION_COMMIT=1: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions show in the working set even though usually just a small part is committed to physical memory. This is why it turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better to turn it on as it improves performance and has no other drawbacks.
  • +
  • MIMALLOC_RESERVE_HUGE_OS_PAGES=N: where N is the number of 1GiB huge OS pages. This reserves the huge pages at startup and can give quite a performance improvement on long running workloads. Usually it is better to not use MIMALLOC_LARGE_OS_PAGES in combination with this setting. Just like large OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented. Note that we usually need to explicitly enable huge OS pages (as on Windows and Linux)). With huge OS pages, it may be beneficial to set the setting MIMALLOC_EAGER_COMMIT_DELAY=N (with usually N as 1) to delay the initial N segments of a thread to not allocate in the huge OS pages; this prevents threads that are short lived and allocate just a little to take up space in the huge OS page area (which cannot be reset).
  • +
+
+
+
+ + + + diff --git a/docs/functions.html b/docs/functions.html index d2615a17..43e116ed 100644 --- a/docs/functions.html +++ b/docs/functions.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/functions_vars.html b/docs/functions_vars.html index b824832f..060a18d2 100644 --- a/docs/functions_vars.html +++ b/docs/functions_vars.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__aligned.html b/docs/group__aligned.html index 4980b45a..88c10eb4 100644 --- a/docs/group__aligned.html +++ b/docs/group__aligned.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__analysis.html b/docs/group__analysis.html index 3301fdef..b8d644aa 100644 --- a/docs/group__analysis.html +++ b/docs/group__analysis.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__extended.html b/docs/group__extended.html index 4d07f38d..85ea3624 100644 --- a/docs/group__extended.html +++ b/docs/group__extended.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -118,12 +118,12 @@ Macros - - - - - - + + + + + +

Typedefs

typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat)
 Type of deferred free functions. More...
 
typedef void() mi_output_fun(const char *msg)
 Type of output functions. More...
 
typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
 Type of deferred free functions. More...
 
typedef void() mi_output_fun(const char *msg, void *arg)
 Type of output functions. More...
 
@@ -142,9 +142,12 @@ Functions - - - + + + + + + @@ -157,21 +160,24 @@ Functions - - - - - - - - - + + + + + + + + + - - - + + + + + + @@ -196,14 +202,14 @@ Functions

Typedef Documentation

- -

◆ mi_deferred_free_fun

+ +

◆ mi_deferred_free_fun

Functions

void mi_collect (bool force)
 Eagerly free memory. More...
 
void mi_stats_print (mi_output_fun *out)
 Print the main statistics. More...
 
void mi_stats_print (void *out)
 Print the main statistics. More...
 
void mi_stats_print (mi_output_fun *out, void *arg)
 Print the main statistics. More...
 
void mi_stats_reset (void)
 Reset statistics. More...
 
void mi_thread_done (void)
 Uninitialize mimalloc on a thread. More...
 
void mi_thread_stats_print (mi_output_fun *out)
 Print out heap statistics for this thread. More...
 
void mi_register_deferred_free (mi_deferred_free_fun *deferred_free)
 Register a deferred free function. More...
 
void mi_register_output (mi_output_fun *out) mi_attr_noexcept
 Register an output function. More...
 
void mi_thread_stats_print_out (mi_output_fun *out, void *arg)
 Print out heap statistics for this thread. More...
 
void mi_register_deferred_free (mi_deferred_free_fun *deferred_free, void *arg)
 Register a deferred free function. More...
 
void mi_register_output (mi_output_fun *out, void *arg)
 Register an output function. More...
 
bool mi_is_in_heap_region (const void *p)
 Is a pointer part of our heap? More...
 
int mi_reserve_huge_os_pages (size_t pages, double max_secs, size_t *pages_reserved)
 Reserve pages of huge OS pages (1GiB) but stops after at most max_secs seconds. More...
 
int mi_reserve_huge_os_pages_interleave (size_t pages, size_t numa_nodes, size_t timeout_msecs)
 Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most timeout_msecs seconds. More...
 
int mi_reserve_huge_os_pages_at (size_t pages, int numa_node, size_t timeout_msecs)
 Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs seconds. More...
 
bool mi_is_redirected ()
 Is the C runtime malloc API redirected? More...
 
- +
typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat)typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
@@ -212,22 +218,23 @@ Functions
Parameters
- + +
forceIf true all outstanding items should be freed.
heartbeatA monotonically increasing count.
heartbeatA monotonically increasing count.
argArgument that was passed at registration to hold extra state.
-
See also
mi_register_deferred_free
+
See also
mi_register_deferred_free
- -

◆ mi_output_fun

+ +

◆ mi_output_fun

- +
typedef void() mi_output_fun(const char *msg)typedef void() mi_output_fun(const char *msg, void *arg)
@@ -235,11 +242,12 @@ Functions

Type of output functions.

Parameters
- + +
msgMessage to output.
msgMessage to output.
argArgument that was passed at registration to hold extra state.
-
See also
mi_register_output()
+
See also
mi_register_output()
@@ -375,8 +383,8 @@ Functions
- -

◆ mi_register_deferred_free()

+ +

◆ mi_register_deferred_free()

@@ -384,74 +392,14 @@ Functions void mi_register_deferred_free ( - mi_deferred_free_fun *  - deferred_free) - - - -
- -

Register a deferred free function.

-
Parameters
- - -
deferred_freeAddress of a deferred free-ing function or NULL to unregister.
-
-
-

Some runtime systems use deferred free-ing, for example when using reference counting to limit the worst case free time. Such systems can register (re-entrant) deferred free function to free more memory on demand. When the force parameter is true all possible memory should be freed. The per-thread heartbeat parameter is monotonically increasing and guaranteed to be deterministic if the program allocates deterministically. The deferred_free function is guaranteed to be called deterministically after some number of allocations (regardless of freeing or available free memory). At most one deferred_free function can be active.

- -
-
- -

◆ mi_register_output()

- -
-
- - - - - - - - -
void mi_register_output (mi_output_funout)
-
- -

Register an output function.

-
Parameters
- - -
outThe output function, use NULL to output to stdout.
-
-
-

The out function is called to output any information from mimalloc, like verbose or warning messages.

- -
-
- -

◆ mi_reserve_huge_os_pages()

- -
-
- - - - - - + + - - - - - - - - + + @@ -461,17 +409,145 @@ Functions
int mi_reserve_huge_os_pages (size_t pages, mi_deferred_free_fundeferred_free,
double max_secs,
size_t * pages_reserved void * arg 
-

Reserve pages of huge OS pages (1GiB) but stops after at most max_secs seconds.

+

Register a deferred free function.

+
Parameters
+ + + +
deferred_freeAddress of a deferred free-ing function or NULL to unregister.
argArgument that will be passed on to the deferred free function.
+
+
+

Some runtime systems use deferred free-ing, for example when using reference counting to limit the worst case free time. Such systems can register (re-entrant) deferred free function to free more memory on demand. When the force parameter is true all possible memory should be freed. The per-thread heartbeat parameter is monotonically increasing and guaranteed to be deterministic if the program allocates deterministically. The deferred_free function is guaranteed to be called deterministically after some number of allocations (regardless of freeing or available free memory). At most one deferred_free function can be active.

+ +
+
+ +

◆ mi_register_output()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void mi_register_output (mi_output_funout,
void * arg 
)
+
+ +

Register an output function.

+
Parameters
+ + + +
outThe output function, use NULL to output to stderr.
argArgument that will be passed on to the output function.
+
+
+

The out function is called to output any information from mimalloc, like verbose or warning messages.

+ +
+
+ +

◆ mi_reserve_huge_os_pages_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
int mi_reserve_huge_os_pages_at (size_t pages,
int numa_node,
size_t timeout_msecs 
)
+
+ +

Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs seconds.

Parameters
- - + +
pagesThe number of 1GiB pages to reserve.
max_secsMaximum number of seconds to try reserving.
pages_reservedIf not NULL, it is set to the actual number of pages that were reserved.
numa_nodeThe NUMA node where the memory is reserved (start at 0).
timeout_msecsMaximum number of milli-seconds to try reserving, or 0 for no timeout.
Returns
0 if successfull, ENOMEM if running out of memory, or ETIMEDOUT if timed out.
-

The reserved memory is used by mimalloc to satisfy allocations. May quit before max_secs are expired if it estimates it will take more than 1.5 times max_secs. The time limit is needed because on some operating systems it can take a long time to reserve contiguous memory if the physical memory is fragmented.

+

The reserved memory is used by mimalloc to satisfy allocations. May quit before timeout_msecs are expired if it estimates it will take more than 1.5 times timeout_msecs. The time limit is needed because on some operating systems it can take a long time to reserve contiguous memory if the physical memory is fragmented.

+ +
+
+ +

◆ mi_reserve_huge_os_pages_interleave()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
int mi_reserve_huge_os_pages_interleave (size_t pages,
size_t numa_nodes,
size_t timeout_msecs 
)
+
+ +

Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most timeout_msecs seconds.

+
Parameters
+ + + + +
pagesThe number of 1GiB pages to reserve.
numa_nodesThe number of nodes do evenly divide the pages over, or 0 for using the actual number of NUMA nodes.
timeout_msecsMaximum number of milli-seconds to try reserving, or 0 for no timeout.
+
+
+
Returns
0 if successfull, ENOMEM if running out of memory, or ETIMEDOUT if timed out.
+

The reserved memory is used by mimalloc to satisfy allocations. May quit before timeout_msecs are expired if it estimates it will take more than 1.5 times timeout_msecs. The time limit is needed because on some operating systems it can take a long time to reserve contiguous memory if the physical memory is fragmented.

@@ -495,8 +571,8 @@ Functions
- -

◆ mi_stats_print()

+ +

◆ mi_stats_print() [1/2]

@@ -504,7 +580,7 @@ Functions void mi_stats_print ( - mi_output_fun *  + void *  out) @@ -514,7 +590,45 @@ Functions

Print the main statistics.

Parameters
- + +
outOutput function. Use NULL for outputting to stderr.
outIgnored, outputs to the registered output function or stderr by default.
+
+
+

Most detailed when using a debug build.

+ +
+
+ +

◆ mi_stats_print() [2/2]

+ +
+
+ + + + + + + + + + + + + + + + + + +
void mi_stats_print (mi_output_funout,
void * arg 
)
+
+ +

Print the main statistics.

+
Parameters
+ + +
outAn output function or NULL for the default.
argOptional argument passed to out (if not NULL)
@@ -584,18 +698,28 @@ Functions
- -

◆ mi_thread_stats_print()

+ +

◆ mi_thread_stats_print_out()

- + - - + + + + + + + + + + + +
void mi_thread_stats_print void mi_thread_stats_print_out (mi_output_funout)mi_output_funout,
void * arg 
)
@@ -603,7 +727,8 @@ Functions

Print out heap statistics for this thread.

Parameters
- + +
outOutput function. Use NULL for outputting to stderr.
outAn output function or NULL for the default.
argOptional argument passed to out (if not NULL)
diff --git a/docs/group__extended.js b/docs/group__extended.js index 00c73614..7152b518 100644 --- a/docs/group__extended.js +++ b/docs/group__extended.js @@ -1,22 +1,24 @@ var group__extended = [ [ "MI_SMALL_SIZE_MAX", "group__extended.html#ga1ea64283508718d9d645c38efc2f4305", null ], - [ "mi_deferred_free_fun", "group__extended.html#ga22213691c3ce5ab4d91b24aff1023529", null ], - [ "mi_output_fun", "group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f", null ], + [ "mi_deferred_free_fun", "group__extended.html#ga299dae78d25ce112e384a98b7309c5be", null ], + [ "mi_output_fun", "group__extended.html#gad823d23444a4b77a40f66bf075a98a0c", null ], [ "mi_collect", "group__extended.html#ga421430e2226d7d468529cec457396756", null ], [ "mi_good_size", "group__extended.html#gac057927cd06c854b45fe7847e921bd47", null ], [ "mi_is_in_heap_region", "group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6", null ], [ "mi_is_redirected", "group__extended.html#gaad25050b19f30cd79397b227e0157a3f", null ], [ "mi_malloc_small", "group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99", null ], - [ "mi_register_deferred_free", "group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2", null ], - [ "mi_register_output", "group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5", null ], - [ "mi_reserve_huge_os_pages", "group__extended.html#ga2664f36a2dd557741c429cb799f04641", null ], + [ "mi_register_deferred_free", "group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece", null ], + [ "mi_register_output", "group__extended.html#gae5b17ff027cd2150b43a33040250cf3f", null ], + [ "mi_reserve_huge_os_pages_at", "group__extended.html#ga7795a13d20087447281858d2c771cca1", null ], + [ "mi_reserve_huge_os_pages_interleave", "group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50", null ], [ "mi_stats_merge", "group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1", null ], - [ "mi_stats_print", "group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01", null ], + [ "mi_stats_print", "group__extended.html#ga2d126e5c62d3badc35445e5d84166df2", null ], + [ "mi_stats_print", "group__extended.html#ga256cc6f13a142deabbadd954a217e228", null ], [ "mi_stats_reset", "group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99", null ], [ "mi_thread_done", "group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf", null ], [ "mi_thread_init", "group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17", null ], - [ "mi_thread_stats_print", "group__extended.html#ga489670a15d1a257ab4639e645ee4612a", null ], + [ "mi_thread_stats_print_out", "group__extended.html#gab1dac8476c46cb9eecab767eb40c1525", null ], [ "mi_usable_size", "group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee", null ], [ "mi_zalloc_small", "group__extended.html#ga220f29f40a44404b0061c15bc1c31152", null ] ]; \ No newline at end of file diff --git a/docs/group__heap.html b/docs/group__heap.html index 753aaba3..0973279a 100644 --- a/docs/group__heap.html +++ b/docs/group__heap.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__malloc.html b/docs/group__malloc.html index 6bd71d06..bee7b4eb 100644 --- a/docs/group__malloc.html +++ b/docs/group__malloc.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__options.html b/docs/group__options.html index a34a9307..71c7ba24 100644 --- a/docs/group__options.html +++ b/docs/group__options.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -123,11 +123,12 @@ Enumerations mi_option_segment_cache,
  mi_option_page_reset, -mi_option_cache_reset, -mi_option_reset_decommits, -mi_option_eager_commit_delay, +mi_option_segment_reset, +mi_option_reset_delay, +mi_option_use_numa_nodes,
-  mi_option_segment_reset, +  mi_option_reset_decommits, +mi_option_eager_commit_delay, mi_option_os_tag, _mi_option_last
@@ -183,16 +184,18 @@ Functions mi_option_segment_cache 

The number of segments per thread to keep cached.

-mi_option_page_reset 

Reset page memory when it becomes free.

+mi_option_page_reset 

Reset page memory after mi_option_reset_delay milliseconds when it becomes free.

-mi_option_cache_reset 

Reset segment memory when a segment is cached.

+mi_option_segment_reset 

Experimental.

+ +mi_option_reset_delay 

Delay in milli-seconds before resetting a page (100ms by default)

+ +mi_option_use_numa_nodes 

Pretend there are at most N NUMA nodes.

mi_option_reset_decommits 

Experimental.

mi_option_eager_commit_delay 

Experimental.

-mi_option_segment_reset 

Experimental.

- mi_option_os_tag 

OS tag to assign to mimalloc'd memory.

_mi_option_last  diff --git a/docs/group__options.js b/docs/group__options.js index 4bf52d54..1d84ea8b 100644 --- a/docs/group__options.js +++ b/docs/group__options.js @@ -10,10 +10,11 @@ var group__options = [ "mi_option_reserve_huge_os_pages", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2", null ], [ "mi_option_segment_cache", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1", null ], [ "mi_option_page_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968", null ], - [ "mi_option_cache_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07", null ], + [ "mi_option_segment_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d", null ], + [ "mi_option_reset_delay", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5", null ], + [ "mi_option_use_numa_nodes", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74", null ], [ "mi_option_reset_decommits", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536", null ], [ "mi_option_eager_commit_delay", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c", null ], - [ "mi_option_segment_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d", null ], [ "mi_option_os_tag", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4b74ae2a69e445de6c2361b73c1d14bf", null ], [ "_mi_option_last", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca5b4357b74be0d87568036c32eb1a2e4a", null ] ] ], diff --git a/docs/group__posix.html b/docs/group__posix.html index b9cf0b52..65e8ff7e 100644 --- a/docs/group__posix.html +++ b/docs/group__posix.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__typed.html b/docs/group__typed.html index 8ea0f095..cf5ac5d1 100644 --- a/docs/group__typed.html +++ b/docs/group__typed.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__zeroinit.html b/docs/group__zeroinit.html new file mode 100644 index 00000000..28983138 --- /dev/null +++ b/docs/group__zeroinit.html @@ -0,0 +1,597 @@ + + + + + + + +mi-malloc: Zero initialized re-allocation + + + + + + + + + + + + + + + + +
+
+ + + + + + + + +
+
mi-malloc +  1.4 +
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+ +
+
Zero initialized re-allocation
+
+
+ +

The zero-initialized re-allocations are only valid on memory that was originally allocated with zero initialization too. +More...

+ + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

void * mi_rezalloc (void *p, size_t newsize)
 
void * mi_rezalloc_aligned (void *p, size_t newsize, size_t alignment)
 
void * mi_rezalloc_aligned_at (void *p, size_t newsize, size_t alignment, size_t offset)
 
void * mi_recalloc_aligned (void *p, size_t newcount, size_t size, size_t alignment)
 
void * mi_recalloc_aligned_at (void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
 
void * mi_heap_rezalloc (mi_heap_t *heap, void *p, size_t newsize)
 
void * mi_heap_recalloc (mi_heap_t *heap, void *p, size_t newcount, size_t size)
 
void * mi_heap_rezalloc_aligned (mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
 
void * mi_heap_rezalloc_aligned_at (mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
 
void * mi_heap_recalloc_aligned (mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment)
 
void * mi_heap_recalloc_aligned_at (mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
 
+

Detailed Description

+

The zero-initialized re-allocations are only valid on memory that was originally allocated with zero initialization too.

+

e.g. mi_calloc, mi_zalloc, mi_zalloc_aligned etc. see https://github.com/microsoft/mimalloc/issues/63#issuecomment-508272992

+

Function Documentation

+ +

◆ mi_heap_recalloc()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_recalloc (mi_heap_theap,
void * p,
size_t newcount,
size_t size 
)
+
+ +
+
+ +

◆ mi_heap_recalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_recalloc_aligned (mi_heap_theap,
void * p,
size_t newcount,
size_t size,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_heap_recalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_recalloc_aligned_at (mi_heap_theap,
void * p,
size_t newcount,
size_t size,
size_t alignment,
size_t offset 
)
+
+ +
+
+ +

◆ mi_heap_rezalloc()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_rezalloc (mi_heap_theap,
void * p,
size_t newsize 
)
+
+ +
+
+ +

◆ mi_heap_rezalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_rezalloc_aligned (mi_heap_theap,
void * p,
size_t newsize,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_heap_rezalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_rezalloc_aligned_at (mi_heap_theap,
void * p,
size_t newsize,
size_t alignment,
size_t offset 
)
+
+ +
+
+ +

◆ mi_recalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_recalloc_aligned (void * p,
size_t newcount,
size_t size,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_recalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_recalloc_aligned_at (void * p,
size_t newcount,
size_t size,
size_t alignment,
size_t offset 
)
+
+ +
+
+ +

◆ mi_rezalloc()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void* mi_rezalloc (void * p,
size_t newsize 
)
+
+ +
+
+ +

◆ mi_rezalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_rezalloc_aligned (void * p,
size_t newsize,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_rezalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_rezalloc_aligned_at (void * p,
size_t newsize,
size_t alignment,
size_t offset 
)
+
+ +
+
+
+
+ + + + diff --git a/docs/group__zeroinit.js b/docs/group__zeroinit.js new file mode 100644 index 00000000..b9297d21 --- /dev/null +++ b/docs/group__zeroinit.js @@ -0,0 +1,14 @@ +var group__zeroinit = +[ + [ "mi_heap_recalloc", "group__zeroinit.html#ga8648c5fbb22a80f0262859099f06dfbd", null ], + [ "mi_heap_recalloc_aligned", "group__zeroinit.html#ga9f3f999396c8f77ca5e80e7b40ac29e3", null ], + [ "mi_heap_recalloc_aligned_at", "group__zeroinit.html#ga496452c96f1de8c500be9fddf52edaf7", null ], + [ "mi_heap_rezalloc", "group__zeroinit.html#gacfad83f14eb5d6a42a497a898e19fc76", null ], + [ "mi_heap_rezalloc_aligned", "group__zeroinit.html#ga375fa8a611c51905e592d5d467c49664", null ], + [ "mi_heap_rezalloc_aligned_at", "group__zeroinit.html#gac90da54fa7e5d10bdc97ce0b51dce2eb", null ], + [ "mi_recalloc_aligned", "group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f", null ], + [ "mi_recalloc_aligned_at", "group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9", null ], + [ "mi_rezalloc", "group__zeroinit.html#ga8c292e142110229a2980b37ab036dbc6", null ], + [ "mi_rezalloc_aligned", "group__zeroinit.html#gacd71a7bce96aab38ae6de17af2eb2cf0", null ], + [ "mi_rezalloc_aligned_at", "group__zeroinit.html#gae8b358c417e61d5307da002702b0a8e1", null ] +]; \ No newline at end of file diff --git a/docs/index.html b/docs/index.html index bf758c3c..0efc9c09 100644 --- a/docs/index.html +++ b/docs/index.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -105,7 +105,7 @@ $(document).ready(function(){initNavTree('index.html','');});

This is the API documentation of the mimalloc allocator (pronounced "me-malloc") – a general purpose allocator with excellent performance characteristics. Initially developed by Daan Leijen for the run-time systems of the Koka and Lean languages.

It is a drop-in replacement for malloc and can be used in other programs without code changes, for example, on Unix you can use it as:

> LD_PRELOAD=/usr/bin/libmimalloc.so myprogram

Notable aspects of the design include:

    -
  • small and consistent: the library is less than 3500 LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic heartbeat and deferred freeing (for bounded worst-case times with reference counting).
  • +
  • small and consistent: the library is less than 6k LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic heartbeat and deferred freeing (for bounded worst-case times with reference counting).
  • free list sharding: the big idea: instead of one big free list (per size class) we have many smaller lists per memory "page" which both reduces fragmentation and increases locality – things that are allocated close in time get allocated close in memory. (A memory "page" in mimalloc contains blocks of one size class and is usually 64KiB on a 64-bit system).
  • eager page reset: when a "page" becomes empty (with increased chance due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged") reducing (real) memory pressure and fragmentation, especially in long running programs.
  • secure: mimalloc can be build in secure mode, adding guard pages, randomized allocation, encrypted free lists, etc. to protect against various heap vulnerabilities. The performance penalty is only around 3% on average over our benchmarks.
  • diff --git a/docs/mimalloc-doc_8h_source.html b/docs/mimalloc-doc_8h_source.html index 3a235533..c240f151 100644 --- a/docs/mimalloc-doc_8h_source.html +++ b/docs/mimalloc-doc_8h_source.html @@ -37,7 +37,7 @@ Logo
    mi-malloc -  1.0 +  1.4
    @@ -102,35 +102,30 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
    mimalloc-doc.h
-
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(mi_output_fun* out);
304 
306 void mi_stats_reset(void);
307 
309 void mi_stats_merge(void);
310 
314 void mi_thread_init(void);
315 
320 void mi_thread_done(void);
321 
327 
333 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat);
334 
350 
355 typedef void (mi_output_fun)(const char* msg);
356 
362 void mi_register_output(mi_output_fun* out) mi_attr_noexcept;
363 
368 bool mi_is_in_heap_region(const void* p);
369 
381 int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved);
382 
387 bool mi_is_redirected();
388 
389 
391 
392 // ------------------------------------------------------
393 // Aligned allocation
394 // ------------------------------------------------------
395 
401 
414 void* mi_malloc_aligned(size_t size, size_t alignment);
415 void* mi_zalloc_aligned(size_t size, size_t alignment);
416 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
417 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
418 
429 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
430 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
431 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
432 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
433 
435 
441 
446 struct mi_heap_s;
447 
452 typedef struct mi_heap_s mi_heap_t;
453 
456 
464 void mi_heap_delete(mi_heap_t* heap);
465 
473 void mi_heap_destroy(mi_heap_t* heap);
474 
479 
483 
490 
492 void mi_heap_collect(mi_heap_t* heap, bool force);
493 
496 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
497 
501 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
502 
505 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
506 
509 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
510 
513 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
514 
517 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
518 
521 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
522 
525 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
526 
527 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
528 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
529 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
530 
531 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
532 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
533 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
534 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
535 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
536 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
537 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
538 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
539 
541 
542 
551 
552 void* mi_rezalloc(void* p, size_t newsize);
553 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
554 
555 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
556 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
557 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
558 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
559 
560 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
561 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
562 
563 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
564 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
565 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
566 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
567 
569 
575 
587 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
588 
590 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
591 
593 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
594 
596 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
597 
599 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
600 
602 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
603 
605 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
606 
608 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
609 
611 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
612 
614 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
615 
617 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
618 
620 
626 
633 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
634 
643 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
644 
652 bool mi_check_owned(const void* p);
653 
656 typedef struct mi_heap_area_s {
657  void* blocks;
658  size_t reserved;
659  size_t committed;
660  size_t used;
661  size_t block_size;
663 
671 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
672 
684 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
685 
687 
693 
695 typedef enum mi_option_e {
696  // stable options
700  // the following options are experimental
713 } mi_option_t;
714 
715 
716 bool mi_option_enabled(mi_option_t option);
717 void mi_option_enable(mi_option_t option, bool enable);
718 void mi_option_enable_default(mi_option_t option, bool enable);
719 
720 long mi_option_get(mi_option_t option);
721 void mi_option_set(mi_option_t option, long value);
722 void mi_option_set_default(mi_option_t option, long value);
723 
724 
726 
733 
734 void* mi_recalloc(void* p, size_t count, size_t size);
735 size_t mi_malloc_size(const void* p);
736 size_t mi_malloc_usable_size(const void *p);
737 
739 void mi_cfree(void* p);
740 
741 int mi_posix_memalign(void** p, size_t alignment, size_t size);
742 int mi__posix_memalign(void** p, size_t alignment, size_t size);
743 void* mi_memalign(size_t alignment, size_t size);
744 void* mi_valloc(size_t size);
745 
746 void* mi_pvalloc(size_t size);
747 void* mi_aligned_alloc(size_t alignment, size_t size);
748 void* mi_reallocarray(void* p, size_t count, size_t size);
749 
750 void mi_free_size(void* p, size_t size);
751 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
752 void mi_free_aligned(void* p, size_t alignment);
753 
755 void* mi_new(std::size_t n) noexcept(false);
756 
758 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
759 
761 void* mi_new_nothrow(size_t n);
762 ``
764 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
765 
767 
void mi_option_enable_default(mi_option_t option, bool enable)
+
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(void* out);
304 
310 void mi_stats_print(mi_output_fun* out, void* arg);
311 
313 void mi_stats_reset(void);
314 
316 void mi_stats_merge(void);
317 
321 void mi_thread_init(void);
322 
327 void mi_thread_done(void);
328 
334 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
335 
342 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
343 
359 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
360 
366 typedef void (mi_output_fun)(const char* msg, void* arg);
367 
374 void mi_register_output(mi_output_fun* out, void* arg);
375 
380 bool mi_is_in_heap_region(const void* p);
381 
382 
395 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
396 
409 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
410 
411 
416 bool mi_is_redirected();
417 
418 
420 
421 // ------------------------------------------------------
422 // Aligned allocation
423 // ------------------------------------------------------
424 
430 
443 void* mi_malloc_aligned(size_t size, size_t alignment);
444 void* mi_zalloc_aligned(size_t size, size_t alignment);
445 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
446 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
447 
458 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
459 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
460 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
461 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
462 
464 
470 
475 struct mi_heap_s;
476 
481 typedef struct mi_heap_s mi_heap_t;
482 
485 
493 void mi_heap_delete(mi_heap_t* heap);
494 
502 void mi_heap_destroy(mi_heap_t* heap);
503 
508 
512 
519 
521 void mi_heap_collect(mi_heap_t* heap, bool force);
522 
525 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
526 
530 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
531 
534 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
535 
538 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
539 
542 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
543 
546 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
547 
550 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
551 
554 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
555 
556 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
557 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
558 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
559 
560 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
561 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
562 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
563 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
564 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
565 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
566 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
567 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
568 
570 
571 
580 
581 void* mi_rezalloc(void* p, size_t newsize);
582 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
583 
584 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
585 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
586 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
587 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
588 
589 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
590 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
591 
592 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
593 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
594 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
595 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
596 
598 
604 
616 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
617 
619 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
620 
622 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
623 
625 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
626 
628 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
629 
631 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
632 
634 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
635 
637 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
638 
640 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
641 
643 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
644 
646 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
647 
649 
655 
662 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
663 
672 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
673 
681 bool mi_check_owned(const void* p);
682 
685 typedef struct mi_heap_area_s {
686  void* blocks;
687  size_t reserved;
688  size_t committed;
689  size_t used;
690  size_t block_size;
692 
700 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
701 
713 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
714 
716 
722 
724 typedef enum mi_option_e {
725  // stable options
729  // the following options are experimental
743 } mi_option_t;
744 
745 
746 bool mi_option_enabled(mi_option_t option);
747 void mi_option_enable(mi_option_t option, bool enable);
748 void mi_option_enable_default(mi_option_t option, bool enable);
749 
750 long mi_option_get(mi_option_t option);
751 void mi_option_set(mi_option_t option, long value);
752 void mi_option_set_default(mi_option_t option, long value);
753 
754 
756 
763 
764 void* mi_recalloc(void* p, size_t count, size_t size);
765 size_t mi_malloc_size(const void* p);
766 size_t mi_malloc_usable_size(const void *p);
767 
769 void mi_cfree(void* p);
770 
771 int mi_posix_memalign(void** p, size_t alignment, size_t size);
772 int mi__posix_memalign(void** p, size_t alignment, size_t size);
773 void* mi_memalign(size_t alignment, size_t size);
774 void* mi_valloc(size_t size);
775 
776 void* mi_pvalloc(size_t size);
777 void* mi_aligned_alloc(size_t alignment, size_t size);
778 void* mi_reallocarray(void* p, size_t count, size_t size);
779 
780 void mi_free_size(void* p, size_t size);
781 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
782 void mi_free_aligned(void* p, size_t alignment);
783 
785 void* mi_new(std::size_t n) noexcept(false);
786 
788 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
789 
791 void* mi_new_nothrow(size_t n);
792 ``
794 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
795 
797 
void mi_option_enable_default(mi_option_t option, bool enable)
size_t mi_usable_size(void *p)
Return the available bytes in a memory block.
void * mi_reallocn(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes.
void * mi_malloc_aligned(size_t size, size_t alignment)
Allocate size bytes aligned by alignment.
void * mi_recalloc_aligned_at(void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
-
void mi_stats_print(mi_output_fun *out)
Print the main statistics.
void mi_stats_reset(void)
Reset statistics.
void * mi_heap_realloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
void * mi_recalloc(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes, with extra memory initialized to zero.
void * mi_mallocn(size_t count, size_t size)
Allocate count elements of size bytes.
size_t mi_malloc_size(const void *p)
-
Reset segment memory when a segment is cached.
Definition: mimalloc-doc.h:707
int mi_posix_memalign(void **p, size_t alignment, size_t size)
void mi_stats_merge(void)
Merge thread local statistics with the main statistics and reset.
-
void() mi_output_fun(const char *msg)
Type of output functions.
Definition: mimalloc-doc.h:355
-
void mi_register_output(mi_output_fun *out) mi_attr_noexcept
Register an output function.
void mi_option_set_default(mi_option_t option, long value)
void * mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false)
raise std::bad_alloc exception on failure.
void * mi_rezalloc(void *p, size_t newsize)
-
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:701
+
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:730
void * mi_heap_zalloc(mi_heap_t *heap, size_t size)
Allocate zero-initialized in a specific heap.
void mi_option_set(mi_option_t option, long value)
-
void mi_register_deferred_free(mi_deferred_free_fun *deferred_free)
Register a deferred free function.
-
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:702
+
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:731
void mi_cfree(void *p)
Just as free but also checks if the pointer p belongs to our heap.
void * mi_recalloc_aligned(void *p, size_t newcount, size_t size, size_t alignment)
-
Definition: mimalloc-doc.h:712
+
Definition: mimalloc-doc.h:742
void * mi_realloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:657
+
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:686
void * mi_realloc_aligned(void *p, size_t newsize, size_t alignment)
int mi__posix_memalign(void **p, size_t alignment, size_t size)
void mi_free(void *p)
Free previously allocated memory.
@@ -146,35 +141,36 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void * mi_heap_rezalloc_aligned_at(mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
void * mi_zalloc(size_t size)
Allocate zero-initialized size bytes.
void * mi_heap_rezalloc(mi_heap_t *heap, void *p, size_t newsize)
-
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:705
+
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:734
void * mi_heap_calloc(mi_heap_t *heap, size_t count, size_t size)
Allocate count zero-initialized elements in a specific heap.
void * mi_new(std::size_t n) noexcept(false)
raise std::bad_alloc exception on failure.
void * mi_heap_calloc_aligned(mi_heap_t *heap, size_t count, size_t size, size_t alignment)
bool mi_is_redirected()
Is the C runtime malloc API redirected?
-
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:661
+
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:690
void * mi_reallocarray(void *p, size_t count, size_t size)
+
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most t...
+
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
Type of deferred free functions.
Definition: mimalloc-doc.h:342
bool mi_is_in_heap_region(const void *p)
Is a pointer part of our heap?
void mi_option_enable(mi_option_t option, bool enable)
void * mi_realloc(void *p, size_t newsize)
Re-allocate memory to newsize bytes.
-
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:704
-
int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t *pages_reserved)
Reserve pages of huge OS pages (1GiB) but stops after at most max_secs seconds.
+
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:733
void * mi_heap_reallocf(mi_heap_t *heap, void *p, size_t newsize)
void mi_free_size_aligned(void *p, size_t size, size_t alignment)
void * mi_rezalloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
Reset page memory when it becomes free.
Definition: mimalloc-doc.h:706
+
Reset page memory after mi_option_reset_delay milliseconds when it becomes free.
Definition: mimalloc-doc.h:735
void mi_thread_done(void)
Uninitialize mimalloc on a thread.
bool mi_heap_visit_blocks(const mi_heap_t *heap, bool visit_all_blocks, mi_block_visit_fun *visitor, void *arg)
Visit all areas and blocks in a heap.
-
void mi_thread_stats_print(mi_output_fun *out)
Print out heap statistics for this thread.
+
Pretend there are at most N NUMA nodes.
Definition: mimalloc-doc.h:738
void * mi_malloc(size_t size)
Allocate size bytes.
bool mi_option_enabled(mi_option_t option)
-
Experimental.
Definition: mimalloc-doc.h:708
+
Experimental.
Definition: mimalloc-doc.h:739
char * mi_heap_strndup(mi_heap_t *heap, const char *s, size_t n)
Duplicate a string of at most length n in a specific heap.
-
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:671
+
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:700
void * mi_heap_recalloc(mi_heap_t *heap, void *p, size_t newcount, size_t size)
void * mi_heap_malloc_aligned_at(mi_heap_t *heap, size_t size, size_t alignment, size_t offset)
char * mi_realpath(const char *fname, char *resolved_name)
Resolve a file path name.
-
Print error messages to stderr.
Definition: mimalloc-doc.h:698
-
Experimental.
Definition: mimalloc-doc.h:710
+
Print error messages to stderr.
Definition: mimalloc-doc.h:727
+
Experimental.
Definition: mimalloc-doc.h:736
void * mi_heap_rezalloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
void * mi_memalign(size_t alignment, size_t size)
void * mi_new_aligned_nothrow(size_t n, size_t alignment)
return NULL on failure.
@@ -183,35 +179,40 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
bool mi_heap_contains_block(mi_heap_t *heap, const void *p)
Does a heap contain a pointer to a previously allocated block?
void mi_heap_collect(mi_heap_t *heap, bool force)
Release outstanding resources in a specific heap.
void * mi_heap_recalloc_aligned_at(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
-
Print verbose messages to stderr.
Definition: mimalloc-doc.h:699
+
Print verbose messages to stderr.
Definition: mimalloc-doc.h:728
void * mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset)
void * mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset)
Allocate size bytes aligned by alignment at a specified offset.
void mi_heap_delete(mi_heap_t *heap)
Delete a previously allocated heap.
-
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:711
+
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:741
mi_heap_t * mi_heap_get_default()
Get the default heap that is used for mi_malloc() et al.
+
int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs ...
void * mi_aligned_alloc(size_t alignment, size_t size)
void * mi_valloc(size_t size)
void mi_thread_init(void)
Initialize mimalloc on a thread.
size_t mi_good_size(size_t size)
Return the used allocation size.
-
Experimental.
Definition: mimalloc-doc.h:709
+
void mi_stats_print(void *out)
Print the main statistics.
+
Experimental.
Definition: mimalloc-doc.h:740
void * mi_heap_recalloc_aligned(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment)
void * mi_heap_mallocn(mi_heap_t *heap, size_t count, size_t size)
Allocate count elements in a specific heap.
-
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:656
-
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:697
+
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:685
+
void mi_thread_stats_print_out(mi_output_fun *out, void *arg)
Print out heap statistics for this thread.
+
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:726
void * mi_zalloc_aligned(size_t size, size_t alignment)
-
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:658
-
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:452
-
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:660
-
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat)
Type of deferred free functions.
Definition: mimalloc-doc.h:333
+
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:687
+
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:481
+
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:689
+
void mi_register_deferred_free(mi_deferred_free_fun *deferred_free, void *arg)
Register a deferred free function.
void mi_free_size(void *p, size_t size)
void mi_collect(bool force)
Eagerly free memory.
void mi_heap_destroy(mi_heap_t *heap)
Destroy a heap, freeing all its still allocated blocks.
void * mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset)
-
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:703
+
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:732
void * mi_heap_reallocn(mi_heap_t *heap, void *p, size_t count, size_t size)
+
void mi_register_output(mi_output_fun *out, void *arg)
Register an output function.
void * mi_heap_malloc_small(mi_heap_t *heap, size_t size)
Allocate a small object in a specific heap.
void * mi_heap_realloc(mi_heap_t *heap, void *p, size_t newsize)
size_t mi_malloc_usable_size(const void *p)
+
void() mi_output_fun(const char *msg, void *arg)
Type of output functions.
Definition: mimalloc-doc.h:366
char * mi_strdup(const char *s)
Allocate and duplicate a string.
void * mi_heap_realloc_aligned_at(mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
void * mi_reallocf(void *p, size_t newsize)
Re-allocate memory to newsize bytes,.
@@ -223,10 +224,11 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
long mi_option_get(mi_option_t option)
mi_heap_t * mi_heap_get_backing()
Get the backing heap.
void mi_free_aligned(void *p, size_t alignment)
+
Delay in milli-seconds before resetting a page (100ms by default)
Definition: mimalloc-doc.h:737
mi_heap_t * mi_heap_new()
Create a new heap that can be used for allocation.
void * mi_heap_malloc(mi_heap_t *heap, size_t size)
Allocate in a specific heap.
-
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:659
-
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:695
+
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:688
+
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:724
bool mi_heap_check_owned(mi_heap_t *heap, const void *p)
Check safely if any pointer is part of a heap.
mi_heap_t * mi_heap_set_default(mi_heap_t *heap)
Set the default heap to use for mi_malloc() et al.
diff --git a/docs/modules.html b/docs/modules.html index ca18e1eb..0bc6036d 100644 --- a/docs/modules.html +++ b/docs/modules.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/navtreeindex0.js b/docs/navtreeindex0.js index 90be7d78..d1b0e072 100644 --- a/docs/navtreeindex0.js +++ b/docs/navtreeindex0.js @@ -29,25 +29,27 @@ var NAVTREEINDEX0 = "group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65":[5,6,1], "group__analysis.html#structmi__heap__area__t":[5,6,0], "group__extended.html":[5,1], -"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,17], -"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,14], +"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,19], +"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,16], "group__extended.html#ga1ea64283508718d9d645c38efc2f4305":[5,1,0], -"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,18], -"group__extended.html#ga22213691c3ce5ab4d91b24aff1023529":[5,1,1], -"group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2":[5,1,8], -"group__extended.html#ga2664f36a2dd557741c429cb799f04641":[5,1,10], -"group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f":[5,1,2], -"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,13], +"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,20], +"group__extended.html#ga256cc6f13a142deabbadd954a217e228":[5,1,14], +"group__extended.html#ga299dae78d25ce112e384a98b7309c5be":[5,1,1], +"group__extended.html#ga2d126e5c62d3badc35445e5d84166df2":[5,1,13], +"group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50":[5,1,11], +"group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece":[5,1,8], +"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,15], "group__extended.html#ga421430e2226d7d468529cec457396756":[5,1,3], -"group__extended.html#ga489670a15d1a257ab4639e645ee4612a":[5,1,16], "group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6":[5,1,5], "group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99":[5,1,7], -"group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5":[5,1,9], -"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,11], -"group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01":[5,1,12], +"group__extended.html#ga7795a13d20087447281858d2c771cca1":[5,1,10], +"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,12], "group__extended.html#gaad25050b19f30cd79397b227e0157a3f":[5,1,6], +"group__extended.html#gab1dac8476c46cb9eecab767eb40c1525":[5,1,18], "group__extended.html#gac057927cd06c854b45fe7847e921bd47":[5,1,4], -"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,15], +"group__extended.html#gad823d23444a4b77a40f66bf075a98a0c":[5,1,2], +"group__extended.html#gae5b17ff027cd2150b43a33040250cf3f":[5,1,9], +"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,17], "group__heap.html":[5,3], "group__heap.html#ga00e95ba1e01acac3cfd95bb7a357a6f0":[5,3,20], "group__heap.html#ga08ca6419a5c057a4d965868998eef487":[5,3,3], @@ -99,19 +101,20 @@ var NAVTREEINDEX0 = "group__options.html#gaf84921c32375e25754dc2ee6a911fa60":[5,7,5], "group__options.html#gafebf7ed116adb38ae5218bc3ce06884c":[5,7,0], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda":[5,7,0,0], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c":[5,7,0,11], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74":[5,7,0,11], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5":[5,7,0,10], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c":[5,7,0,13], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b":[5,7,0,3], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1":[5,7,0,7], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad":[5,7,0,4], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4192d491200d0055df0554d4cf65054e":[5,7,0,5], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4b74ae2a69e445de6c2361b73c1d14bf":[5,7,0,13], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca5b4357b74be0d87568036c32eb1a2e4a":[5,7,0,14], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4b74ae2a69e445de6c2361b73c1d14bf":[5,7,0,14], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca5b4357b74be0d87568036c32eb1a2e4a":[5,7,0,15], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca7c8b7bf5281c581bad64f5daa6442777":[5,7,0,2], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07":[5,7,0,9], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536":[5,7,0,10], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536":[5,7,0,12], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2":[5,7,0,6], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968":[5,7,0,8], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d":[5,7,0,12], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d":[5,7,0,9], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0":[5,7,0,1], "group__posix.html":[5,8], "group__posix.html#ga06d07cf357bbac5c73ba5d0c0c421e17":[5,8,7], diff --git a/docs/overrides.html b/docs/overrides.html index 74ef9dbd..3b5d9bd3 100644 --- a/docs/overrides.html +++ b/docs/overrides.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -118,10 +118,10 @@ $(document).ready(function(){initNavTree('overrides.html','');});

Note that certain security restrictions may apply when doing this from the shell.

Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this (see issue #50).

Windows

-

On Windows you need to link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the /MD or /MDd switch). Moreover, you need to ensure the mimalloc-redirect.dll (or mimalloc-redirect32.dll) is available in the same folder as the mimalloc DLL at runtime (as it as referred to by the mimalloc DLL). The redirection DLL's ensure all calls to the C runtime malloc API get redirected to mimalloc.

-

To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the main function, like mi_version() (or use the /INCLUDE:mi_version switch on the linker). See the mimalloc-override-test project for an example on how to use this.

-

The environment variable MIMALLOC_DISABLE_REDIRECT=1 can be used to disable dynamic overriding at run-time. Use MIMALLOC_VERBOSE=1 to check if mimalloc successfully redirected.

-

(Note: in principle, it should be possible to patch existing executables that are linked with the dynamic C runtime (ucrtbase.dll) by just putting the mimalloc DLL into the import table (and putting mimalloc-redirect.dll in the same folder) Such patching can be done for example with CFF Explorer).

+

Overriding on Windows is robust but requires that you link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the /MD or /MDd switch). Moreover, you need to ensure the mimalloc-redirect.dll (or mimalloc-redirect32.dll) is available in the same folder as the main mimalloc-override.dll at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to mimalloc (in mimalloc-override.dll).

+

To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the main function, like mi_version() (or use the /INCLUDE:mi_version switch on the linker). See the mimalloc-override-test project for an example on how to use this. For best performance on Windows with C++, it is highly recommended to also override the new/delete operations (by including mimalloc-new-delete.h a single(!) source file in your project).

+

The environment variable MIMALLOC_DISABLE_REDIRECT=1 can be used to disable dynamic overriding at run-time. Use MIMALLOC_VERBOSE=1 to check if mimalloc was successfully redirected.

+

(Note: in principle, it is possible to patch existing executables that are linked with the dynamic C runtime (ucrtbase.dll) by just putting the mimalloc-override.dll into the import table (and putting mimalloc-redirect.dll in the same folder) Such patching can be done for example with CFF Explorer).

Static override

On Unix systems, you can also statically link with mimalloc to override the standard malloc interface. The recommended way is to link the final program with the mimalloc single object file (mimalloc-override.o). We use an object file instead of a library file as linkers give preference to that over archives to resolve symbols. To ensure that the standard malloc interface resolves to the mimalloc library, link it as the first object file. For example:

gcc -o myprogram mimalloc-override.o myfile1.c ...

List of Overrides:

diff --git a/docs/pages.html b/docs/pages.html index d0ee9f7a..ad5549bf 100644 --- a/docs/pages.html +++ b/docs/pages.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/search/all_6.js b/docs/search/all_6.js index 2edb9986..cc7a26ec 100644 --- a/docs/search/all_6.js +++ b/docs/search/all_6.js @@ -10,7 +10,7 @@ var searchData= ['mi_5fcfree',['mi_cfree',['../group__posix.html#ga705dc7a64bffacfeeb0141501a5c35d7',1,'mimalloc-doc.h']]], ['mi_5fcheck_5fowned',['mi_check_owned',['../group__analysis.html#ga628c237489c2679af84a4d0d143b3dd5',1,'mimalloc-doc.h']]], ['mi_5fcollect',['mi_collect',['../group__extended.html#ga421430e2226d7d468529cec457396756',1,'mimalloc-doc.h']]], - ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga22213691c3ce5ab4d91b24aff1023529',1,'mimalloc-doc.h']]], + ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga299dae78d25ce112e384a98b7309c5be',1,'mimalloc-doc.h']]], ['mi_5fexpand',['mi_expand',['../group__malloc.html#gaaee66a1d483c3e28f585525fb96707e4',1,'mimalloc-doc.h']]], ['mi_5ffree',['mi_free',['../group__malloc.html#gaf2c7b89c327d1f60f59e68b9ea644d95',1,'mimalloc-doc.h']]], ['mi_5ffree_5faligned',['mi_free_aligned',['../group__posix.html#ga0d28d5cf61e6bfbb18c63092939fe5c9',1,'mimalloc-doc.h']]], @@ -76,7 +76,6 @@ var searchData= ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], - ['mi_5foption_5fcache_5freset',['mi_option_cache_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit',['mi_option_eager_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit_5fdelay',['mi_option_eager_commit_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fregion_5fcommit',['mi_option_eager_region_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad',1,'mimalloc-doc.h']]], @@ -89,6 +88,7 @@ var searchData= ['mi_5foption_5fpage_5freset',['mi_option_page_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968',1,'mimalloc-doc.h']]], ['mi_5foption_5freserve_5fhuge_5fos_5fpages',['mi_option_reserve_huge_os_pages',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2',1,'mimalloc-doc.h']]], ['mi_5foption_5freset_5fdecommits',['mi_option_reset_decommits',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536',1,'mimalloc-doc.h']]], + ['mi_5foption_5freset_5fdelay',['mi_option_reset_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5fcache',['mi_option_segment_cache',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5freset',['mi_option_segment_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d',1,'mimalloc-doc.h']]], ['mi_5foption_5fset',['mi_option_set',['../group__options.html#gaf84921c32375e25754dc2ee6a911fa60',1,'mimalloc-doc.h']]], @@ -96,8 +96,9 @@ var searchData= ['mi_5foption_5fshow_5ferrors',['mi_option_show_errors',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0',1,'mimalloc-doc.h']]], ['mi_5foption_5fshow_5fstats',['mi_option_show_stats',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda',1,'mimalloc-doc.h']]], ['mi_5foption_5ft',['mi_option_t',['../group__options.html#gafebf7ed116adb38ae5218bc3ce06884c',1,'mimalloc-doc.h']]], + ['mi_5foption_5fuse_5fnuma_5fnodes',['mi_option_use_numa_nodes',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74',1,'mimalloc-doc.h']]], ['mi_5foption_5fverbose',['mi_option_verbose',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca7c8b7bf5281c581bad64f5daa6442777',1,'mimalloc-doc.h']]], - ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f',1,'mimalloc-doc.h']]], + ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#gad823d23444a4b77a40f66bf075a98a0c',1,'mimalloc-doc.h']]], ['mi_5fposix_5fmemalign',['mi_posix_memalign',['../group__posix.html#gacff84f226ba9feb2031b8992e5579447',1,'mimalloc-doc.h']]], ['mi_5fpvalloc',['mi_pvalloc',['../group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e',1,'mimalloc-doc.h']]], ['mi_5frealloc',['mi_realloc',['../group__malloc.html#gaf11eb497da57bdfb2de65eb191c69db6',1,'mimalloc-doc.h']]], @@ -111,21 +112,22 @@ var searchData= ['mi_5frecalloc',['mi_recalloc',['../group__malloc.html#ga23a0fbb452b5dce8e31fab1a1958cacc',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned',['mi_recalloc_aligned',['../group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned_5fat',['mi_recalloc_aligned_at',['../group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9',1,'mimalloc-doc.h']]], - ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2',1,'mimalloc-doc.h']]], - ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5',1,'mimalloc-doc.h']]], - ['mi_5freserve_5fhuge_5fos_5fpages',['mi_reserve_huge_os_pages',['../group__extended.html#ga2664f36a2dd557741c429cb799f04641',1,'mimalloc-doc.h']]], + ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece',1,'mimalloc-doc.h']]], + ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#gae5b17ff027cd2150b43a33040250cf3f',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5fat',['mi_reserve_huge_os_pages_at',['../group__extended.html#ga7795a13d20087447281858d2c771cca1',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5finterleave',['mi_reserve_huge_os_pages_interleave',['../group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50',1,'mimalloc-doc.h']]], ['mi_5frezalloc',['mi_rezalloc',['../group__zeroinit.html#ga8c292e142110229a2980b37ab036dbc6',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned',['mi_rezalloc_aligned',['../group__zeroinit.html#gacd71a7bce96aab38ae6de17af2eb2cf0',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned_5fat',['mi_rezalloc_aligned_at',['../group__zeroinit.html#gae8b358c417e61d5307da002702b0a8e1',1,'mimalloc-doc.h']]], ['mi_5fsmall_5fsize_5fmax',['MI_SMALL_SIZE_MAX',['../group__extended.html#ga1ea64283508718d9d645c38efc2f4305',1,'mimalloc-doc.h']]], ['mi_5fstats_5fmerge',['mi_stats_merge',['../group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1',1,'mimalloc-doc.h']]], - ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01',1,'mimalloc-doc.h']]], + ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mi_stats_print(void *out): mimalloc-doc.h'],['../group__extended.html#ga256cc6f13a142deabbadd954a217e228',1,'mi_stats_print(mi_output_fun *out, void *arg): mimalloc-doc.h']]], ['mi_5fstats_5freset',['mi_stats_reset',['../group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99',1,'mimalloc-doc.h']]], ['mi_5fstrdup',['mi_strdup',['../group__malloc.html#gac7cffe13f1f458ed16789488bf92b9b2',1,'mimalloc-doc.h']]], ['mi_5fstrndup',['mi_strndup',['../group__malloc.html#gaaabf971c2571891433477e2d21a35266',1,'mimalloc-doc.h']]], ['mi_5fthread_5fdone',['mi_thread_done',['../group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf',1,'mimalloc-doc.h']]], ['mi_5fthread_5finit',['mi_thread_init',['../group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17',1,'mimalloc-doc.h']]], - ['mi_5fthread_5fstats_5fprint',['mi_thread_stats_print',['../group__extended.html#ga489670a15d1a257ab4639e645ee4612a',1,'mimalloc-doc.h']]], + ['mi_5fthread_5fstats_5fprint_5fout',['mi_thread_stats_print_out',['../group__extended.html#gab1dac8476c46cb9eecab767eb40c1525',1,'mimalloc-doc.h']]], ['mi_5fusable_5fsize',['mi_usable_size',['../group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee',1,'mimalloc-doc.h']]], ['mi_5fvalloc',['mi_valloc',['../group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b',1,'mimalloc-doc.h']]], ['mi_5fzalloc',['mi_zalloc',['../group__malloc.html#gafdd9d8bb2986e668ba9884f28af38000',1,'mimalloc-doc.h']]], diff --git a/docs/search/all_c.html b/docs/search/all_c.html new file mode 100644 index 00000000..3de15867 --- /dev/null +++ b/docs/search/all_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/all_c.js b/docs/search/all_c.js new file mode 100644 index 00000000..2b9b4cea --- /dev/null +++ b/docs/search/all_c.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] +]; diff --git a/docs/search/all_d.html b/docs/search/all_d.html new file mode 100644 index 00000000..a2d5bd7e --- /dev/null +++ b/docs/search/all_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/all_d.js b/docs/search/all_d.js new file mode 100644 index 00000000..2b9b4cea --- /dev/null +++ b/docs/search/all_d.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] +]; diff --git a/docs/search/enumvalues_1.js b/docs/search/enumvalues_1.js index 3ed91631..3b712708 100644 --- a/docs/search/enumvalues_1.js +++ b/docs/search/enumvalues_1.js @@ -1,6 +1,5 @@ var searchData= [ - ['mi_5foption_5fcache_5freset',['mi_option_cache_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit',['mi_option_eager_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit_5fdelay',['mi_option_eager_commit_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fregion_5fcommit',['mi_option_eager_region_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad',1,'mimalloc-doc.h']]], @@ -9,9 +8,11 @@ var searchData= ['mi_5foption_5fpage_5freset',['mi_option_page_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968',1,'mimalloc-doc.h']]], ['mi_5foption_5freserve_5fhuge_5fos_5fpages',['mi_option_reserve_huge_os_pages',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2',1,'mimalloc-doc.h']]], ['mi_5foption_5freset_5fdecommits',['mi_option_reset_decommits',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536',1,'mimalloc-doc.h']]], + ['mi_5foption_5freset_5fdelay',['mi_option_reset_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5fcache',['mi_option_segment_cache',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5freset',['mi_option_segment_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d',1,'mimalloc-doc.h']]], ['mi_5foption_5fshow_5ferrors',['mi_option_show_errors',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0',1,'mimalloc-doc.h']]], ['mi_5foption_5fshow_5fstats',['mi_option_show_stats',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda',1,'mimalloc-doc.h']]], + ['mi_5foption_5fuse_5fnuma_5fnodes',['mi_option_use_numa_nodes',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74',1,'mimalloc-doc.h']]], ['mi_5foption_5fverbose',['mi_option_verbose',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca7c8b7bf5281c581bad64f5daa6442777',1,'mimalloc-doc.h']]] ]; diff --git a/docs/search/functions_0.js b/docs/search/functions_0.js index c5eeb540..d1d209a1 100644 --- a/docs/search/functions_0.js +++ b/docs/search/functions_0.js @@ -81,20 +81,21 @@ var searchData= ['mi_5frecalloc',['mi_recalloc',['../group__malloc.html#ga23a0fbb452b5dce8e31fab1a1958cacc',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned',['mi_recalloc_aligned',['../group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned_5fat',['mi_recalloc_aligned_at',['../group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9',1,'mimalloc-doc.h']]], - ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2',1,'mimalloc-doc.h']]], - ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5',1,'mimalloc-doc.h']]], - ['mi_5freserve_5fhuge_5fos_5fpages',['mi_reserve_huge_os_pages',['../group__extended.html#ga2664f36a2dd557741c429cb799f04641',1,'mimalloc-doc.h']]], + ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece',1,'mimalloc-doc.h']]], + ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#gae5b17ff027cd2150b43a33040250cf3f',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5fat',['mi_reserve_huge_os_pages_at',['../group__extended.html#ga7795a13d20087447281858d2c771cca1',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5finterleave',['mi_reserve_huge_os_pages_interleave',['../group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50',1,'mimalloc-doc.h']]], ['mi_5frezalloc',['mi_rezalloc',['../group__zeroinit.html#ga8c292e142110229a2980b37ab036dbc6',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned',['mi_rezalloc_aligned',['../group__zeroinit.html#gacd71a7bce96aab38ae6de17af2eb2cf0',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned_5fat',['mi_rezalloc_aligned_at',['../group__zeroinit.html#gae8b358c417e61d5307da002702b0a8e1',1,'mimalloc-doc.h']]], ['mi_5fstats_5fmerge',['mi_stats_merge',['../group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1',1,'mimalloc-doc.h']]], - ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01',1,'mimalloc-doc.h']]], + ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mi_stats_print(void *out): mimalloc-doc.h'],['../group__extended.html#ga256cc6f13a142deabbadd954a217e228',1,'mi_stats_print(mi_output_fun *out, void *arg): mimalloc-doc.h']]], ['mi_5fstats_5freset',['mi_stats_reset',['../group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99',1,'mimalloc-doc.h']]], ['mi_5fstrdup',['mi_strdup',['../group__malloc.html#gac7cffe13f1f458ed16789488bf92b9b2',1,'mimalloc-doc.h']]], ['mi_5fstrndup',['mi_strndup',['../group__malloc.html#gaaabf971c2571891433477e2d21a35266',1,'mimalloc-doc.h']]], ['mi_5fthread_5fdone',['mi_thread_done',['../group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf',1,'mimalloc-doc.h']]], ['mi_5fthread_5finit',['mi_thread_init',['../group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17',1,'mimalloc-doc.h']]], - ['mi_5fthread_5fstats_5fprint',['mi_thread_stats_print',['../group__extended.html#ga489670a15d1a257ab4639e645ee4612a',1,'mimalloc-doc.h']]], + ['mi_5fthread_5fstats_5fprint_5fout',['mi_thread_stats_print_out',['../group__extended.html#gab1dac8476c46cb9eecab767eb40c1525',1,'mimalloc-doc.h']]], ['mi_5fusable_5fsize',['mi_usable_size',['../group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee',1,'mimalloc-doc.h']]], ['mi_5fvalloc',['mi_valloc',['../group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b',1,'mimalloc-doc.h']]], ['mi_5fzalloc',['mi_zalloc',['../group__malloc.html#gafdd9d8bb2986e668ba9884f28af38000',1,'mimalloc-doc.h']]], diff --git a/docs/search/functions_1.html b/docs/search/functions_1.html new file mode 100644 index 00000000..bfcf880b --- /dev/null +++ b/docs/search/functions_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/functions_1.js b/docs/search/functions_1.js new file mode 100644 index 00000000..06dbb19b --- /dev/null +++ b/docs/search/functions_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['void',['void',['../group__extended.html#gadc49452cc1634aa03ac83ffe9b97a19c',1,'mimalloc-doc.h']]] +]; diff --git a/docs/search/groups_7.html b/docs/search/groups_7.html new file mode 100644 index 00000000..6a24e7cf --- /dev/null +++ b/docs/search/groups_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/groups_7.js b/docs/search/groups_7.js new file mode 100644 index 00000000..2b9b4cea --- /dev/null +++ b/docs/search/groups_7.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] +]; diff --git a/docs/search/pages_4.html b/docs/search/pages_4.html new file mode 100644 index 00000000..021d277a --- /dev/null +++ b/docs/search/pages_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/pages_4.js b/docs/search/pages_4.js new file mode 100644 index 00000000..b47682a4 --- /dev/null +++ b/docs/search/pages_4.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['using_20the_20library',['Using the library',['../using.html',1,'']]] +]; diff --git a/docs/search/typedefs_0.js b/docs/search/typedefs_0.js index c6f0f7ec..17816828 100644 --- a/docs/search/typedefs_0.js +++ b/docs/search/typedefs_0.js @@ -1,7 +1,7 @@ var searchData= [ ['mi_5fblock_5fvisit_5ffun',['mi_block_visit_fun',['../group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65',1,'mimalloc-doc.h']]], - ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga22213691c3ce5ab4d91b24aff1023529',1,'mimalloc-doc.h']]], + ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga299dae78d25ce112e384a98b7309c5be',1,'mimalloc-doc.h']]], ['mi_5fheap_5ft',['mi_heap_t',['../group__heap.html#ga34a47cde5a5b38c29f1aa3c5e76943c2',1,'mimalloc-doc.h']]], - ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f',1,'mimalloc-doc.h']]] + ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#gad823d23444a4b77a40f66bf075a98a0c',1,'mimalloc-doc.h']]] ]; diff --git a/docs/search/typedefs_1.html b/docs/search/typedefs_1.html new file mode 100644 index 00000000..c8a02685 --- /dev/null +++ b/docs/search/typedefs_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/typedefs_1.js b/docs/search/typedefs_1.js new file mode 100644 index 00000000..ecccb16a --- /dev/null +++ b/docs/search/typedefs_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['heartbeat',['heartbeat',['../group__extended.html#ga411f6e94394a2400aa460c796beff8d8',1,'mimalloc-doc.h']]] +]; diff --git a/docs/search/typedefs_2.html b/docs/search/typedefs_2.html new file mode 100644 index 00000000..86a91955 --- /dev/null +++ b/docs/search/typedefs_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/typedefs_2.js b/docs/search/typedefs_2.js new file mode 100644 index 00000000..2af06079 --- /dev/null +++ b/docs/search/typedefs_2.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['mi_5fblock_5fvisit_5ffun',['mi_block_visit_fun',['../group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65',1,'mimalloc-doc.h']]], + ['mi_5fheap_5ft',['mi_heap_t',['../group__heap.html#ga34a47cde5a5b38c29f1aa3c5e76943c2',1,'mimalloc-doc.h']]] +]; diff --git a/docs/using.html b/docs/using.html index 9b7305b0..eae37a5e 100644 --- a/docs/using.html +++ b/docs/using.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -102,7 +102,11 @@ $(document).ready(function(){initNavTree('using.html','');});
Using the library
-

The preferred usage is including <mimalloc.h>, linking with the shared- or static library, and using the mi_malloc API exclusively for allocation. For example,

gcc -o myprogram -lmimalloc myfile.c

mimalloc uses only safe OS calls (mmap and VirtualAlloc) and can co-exist with other allocators linked to the same program. If you use cmake, you can simply use:

find_package(mimalloc 1.0 REQUIRED)

in your CMakeLists.txt to find a locally installed mimalloc. Then use either:

target_link_libraries(myapp PUBLIC mimalloc)

to link with the shared (dynamic) library, or:

target_link_libraries(myapp PUBLIC mimalloc-static)

to link with the static library. See test\CMakeLists.txt for an example.

+

Build

+

The preferred usage is including <mimalloc.h>, linking with the shared- or static library, and using the mi_malloc API exclusively for allocation. For example,

gcc -o myprogram -lmimalloc myfile.c

mimalloc uses only safe OS calls (mmap and VirtualAlloc) and can co-exist with other allocators linked to the same program. If you use cmake, you can simply use:

find_package(mimalloc 1.0 REQUIRED)

in your CMakeLists.txt to find a locally installed mimalloc. Then use either:

target_link_libraries(myapp PUBLIC mimalloc)

to link with the shared (dynamic) library, or:

target_link_libraries(myapp PUBLIC mimalloc-static)

to link with the static library. See test\CMakeLists.txt for an example.

+

C++

+

For best performance in C++ programs, it is also recommended to override the global new and delete operators. For convience, mimalloc provides mimalloc-new-delete.h which does this for you – just include it in a single(!) source file in your project.

+

In C++, mimalloc also provides the mi_stl_allocator struct which implements the std::allocator interface. For example:

std::vector<some_struct, mi_stl_allocator<some_struct>> vec;
vec.push_back(some_struct());

Statistics

You can pass environment variables to print verbose messages (MIMALLOC_VERBOSE=1) and statistics (MIMALLOC_SHOW_STATS=1) (in the debug version):

> env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363
175451865205073170563711388363 = 374456281610909315237213 * 468551
heap stats: peak total freed unit
normal 2: 16.4 kb 17.5 mb 17.5 mb 16 b ok
normal 3: 16.3 kb 15.2 mb 15.2 mb 24 b ok
normal 4: 64 b 4.6 kb 4.6 kb 32 b ok
normal 5: 80 b 118.4 kb 118.4 kb 40 b ok
normal 6: 48 b 48 b 48 b 48 b ok
normal 17: 960 b 960 b 960 b 320 b ok
heap stats: peak total freed unit
normal: 33.9 kb 32.8 mb 32.8 mb 1 b ok
huge: 0 b 0 b 0 b 1 b ok
total: 33.9 kb 32.8 mb 32.8 mb 1 b ok
malloc requested: 32.8 mb
committed: 58.2 kb 58.2 kb 58.2 kb 1 b ok
reserved: 2.0 mb 2.0 mb 2.0 mb 1 b ok
reset: 0 b 0 b 0 b 1 b ok
segments: 1 1 1
-abandoned: 0
pages: 6 6 6
-abandoned: 0
mmaps: 3
mmap fast: 0
mmap slow: 1
threads: 0
elapsed: 2.022s
process: user: 1.781s, system: 0.016s, faults: 756, reclaims: 0, rss: 2.7 mb

The above model of using the mi_ prefixed API is not always possible though in existing programs that already use the standard malloc interface, and another option is to override the standard malloc interface completely and redirect all calls to the mimalloc library instead.

See Overriding Malloc for more info.

From 9453d8b4683ccc347458666bf3a10de7fa8c2638 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 15:39:06 -0800 Subject: [PATCH 187/293] update documentation --- doc/doxyfile | 4 ++-- doc/mimalloc-doc.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/doxyfile b/doc/doxyfile index 11d71667..91adbeb8 100644 --- a/doc/doxyfile +++ b/doc/doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = mi-malloc # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 1.0 +PROJECT_NUMBER = 1.4 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a @@ -1235,7 +1235,7 @@ HTML_EXTRA_STYLESHEET = mimalloc-doxygen.css # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_FILES = +HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 71cc1589..ea526b12 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -784,6 +784,9 @@ void mi_free_aligned(void* p, size_t alignment); /// raise `std::bad_alloc` exception on failure. void* mi_new(std::size_t n) noexcept(false); +/// raise `std::bad_alloc` exception on failure or overflow. +void* mi_new_n(size_t count, size_t size) noexcept(false); + /// raise `std::bad_alloc` exception on failure. void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false); From f4ee1760b8f58bdb4967e47f34c2495387c2cff2 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 15:39:41 -0800 Subject: [PATCH 188/293] Suppress C source compiled as C++ warning on clang --- CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a894de9b..8f05c883 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,10 @@ endif() if(MI_USE_CXX MATCHES "ON") message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) - set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) + set_source_files_properties(src/static.c test/test-api.c test/test-stress.c PROPERTIES LANGUAGE CXX ) + if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang|Clang") + list(APPEND mi_cflags -Wno-deprecated) + endif() endif() # Compiler flags From 6dd636d82db6516f325c1c3b2695e20a024230ce Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 15:41:52 -0800 Subject: [PATCH 189/293] improve STL allocator using mi_new_n and removing unused parameter names; follow up from pr #193 and #188 --- include/mimalloc.h | 31 +++++++++++++++++------------- src/alloc.c | 47 ++++++++++++++++++++++++++++++---------------- 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 59f394a7..485978e6 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -326,10 +326,11 @@ mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; -mi_decl_export void* mi_new(size_t n) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_aligned(size_t n, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_nothrow(size_t n) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_aligned_nothrow(size_t n, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_nothrow(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); #ifdef __cplusplus } @@ -347,21 +348,25 @@ mi_decl_export void* mi_new_aligned_nothrow(size_t n, size_t alignment) mi_attr_ template struct mi_stl_allocator { typedef T value_type; -#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 + #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 using propagate_on_container_copy_assignment = std::true_type; using propagate_on_container_move_assignment = std::true_type; using propagate_on_container_swap = std::true_type; using is_always_equal = std::true_type; -#endif - mi_stl_allocator() mi_attr_noexcept {} - mi_stl_allocator(const mi_stl_allocator& other) mi_attr_noexcept { (void)other; } - template mi_stl_allocator(const mi_stl_allocator& other) mi_attr_noexcept { (void)other; } - T* allocate(size_t n, const void* hint = 0) { (void)hint; return (T*)mi_mallocn(n, sizeof(T)); } - void deallocate(T* p, size_t n) { mi_free_size(p,n); } + #endif + mi_stl_allocator() mi_attr_noexcept { } + mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } + template mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } + void deallocate(T* p, size_t size) { mi_free_size(p, size); } + #if (__cplusplus >= 201703L) // C++17 + T* allocate(size_t count) { return (T*)mi_new_n(count, sizeof(T)); } + #else + T* allocate(size_t count, const void* hint = 0) { (void)hint; return (T*)mi_new_n(count, sizeof(T)); } + #endif }; -template bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) mi_attr_noexcept { (void)lhs; (void)rhs; return true; } -template bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) mi_attr_noexcept { (void)lhs; (void)rhs; return false; } +template bool operator==(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return true; } +template bool operator!=(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return false; } #endif // __cplusplus #endif diff --git a/src/alloc.c b/src/alloc.c index d66c629b..37d43d9f 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -678,36 +678,51 @@ static bool mi_try_new_handler(bool nothrow) { } #endif -static mi_decl_noinline void* mi_try_new(size_t n, bool nothrow ) { +static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow ) { void* p = NULL; while(p == NULL && mi_try_new_handler(nothrow)) { - p = mi_malloc(n); + p = mi_malloc(size); } return p; } -void* mi_new(size_t n) { - void* p = mi_malloc(n); - if (mi_unlikely(p == NULL)) return mi_try_new(n,false); +void* mi_new(size_t size) { + void* p = mi_malloc(size); + if (mi_unlikely(p == NULL)) return mi_try_new(size,false); return p; } -void* mi_new_aligned(size_t n, size_t alignment) { +void* mi_new_nothrow(size_t size) { + void* p = mi_malloc(size); + if (mi_unlikely(p == NULL)) return mi_try_new(size, true); + return p; +} + +void* mi_new_aligned(size_t size, size_t alignment) { void* p; - do { p = mi_malloc_aligned(n, alignment); } + do { + p = mi_malloc_aligned(size, alignment); + } while(p == NULL && mi_try_new_handler(false)); return p; } -void* mi_new_nothrow(size_t n) { - void* p = mi_malloc(n); - if (mi_unlikely(p == NULL)) return mi_try_new(n,true); +void* mi_new_aligned_nothrow(size_t size, size_t alignment) { + void* p; + do { + p = mi_malloc_aligned(size, alignment); + } + while(p == NULL && mi_try_new_handler(true)); return p; } -void* mi_new_aligned_nothrow(size_t n, size_t alignment) { - void* p; - do { p = mi_malloc_aligned(n, alignment); } - while (p == NULL && mi_try_new_handler(true)); - return p; -} +void* mi_new_n(size_t count, size_t size) { + size_t total; + if (mi_unlikely(mi_mul_overflow(count, size, &total))) { + mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc + return NULL; + } + else { + return mi_new(total); + } +} \ No newline at end of file From dbe721de393cbfce7a699cc6f1b5cf5955a85f7a Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 15:45:12 -0800 Subject: [PATCH 190/293] dont compile test-stress.c as C++ code (or we get atomic compilation errors) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f05c883..27729584 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,7 @@ endif() if(MI_USE_CXX MATCHES "ON") message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) - set_source_files_properties(src/static.c test/test-api.c test/test-stress.c PROPERTIES LANGUAGE CXX ) + set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang|Clang") list(APPEND mi_cflags -Wno-deprecated) endif() From 3e982a3813f191b32fee60b06fba758a98f3a633 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 15:58:43 -0800 Subject: [PATCH 191/293] fix STL deallocate passing count (instead of size) to mi_free_size --- include/mimalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 485978e6..67ff1a35 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -357,7 +357,7 @@ template struct mi_stl_allocator { mi_stl_allocator() mi_attr_noexcept { } mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } template mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } - void deallocate(T* p, size_t size) { mi_free_size(p, size); } + void deallocate(T* p, size_t /* count */) { mi_free(p); } #if (__cplusplus >= 201703L) // C++17 T* allocate(size_t count) { return (T*)mi_new_n(count, sizeof(T)); } #else From dc5838896837cc4bd6bc1583f390fa51e389ae48 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 19:59:55 -0800 Subject: [PATCH 192/293] Add ability to register custom error function called on various error conditions; including ENOMEM --- doc/mimalloc-doc.h | 24 +++++++++ docs/group__extended.html | 74 ++++++++++++++++++++++++++ docs/group__extended.js | 2 + docs/group__posix.html | 41 +++++++++++++++ docs/group__posix.js | 1 + docs/mimalloc-doc_8h_source.html | 55 ++++++++++---------- docs/navtreeindex0.js | 51 +++++++++--------- docs/search/all_6.js | 3 ++ docs/search/functions_0.js | 2 + docs/search/typedefs_0.js | 1 + include/mimalloc-internal.h | 89 +++++++++++++++++++++----------- include/mimalloc.h | 15 ++++-- src/alloc-aligned.c | 6 +-- src/alloc.c | 27 ++++------ src/arena.c | 6 +-- src/init.c | 2 +- src/options.c | 44 ++++++++++++---- src/os.c | 10 ++-- src/page.c | 10 ++-- test/test-api.c | 6 ++- 20 files changed, 342 insertions(+), 127 deletions(-) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index ea526b12..ca744e4c 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -373,6 +373,30 @@ typedef void (mi_output_fun)(const char* msg, void* arg); /// like verbose or warning messages. void mi_register_output(mi_output_fun* out, void* arg); +/// Type of error callback functions. +/// @param err Error code (see mi_register_error() for a complete list). +/// @param arg Argument that was passed at registration to hold extra state. +/// +/// @see mi_register_error() +typedef void (mi_error_fun)(int err, void* arg); + +/// Register an error callback function. +/// @param errfun The error function that is called on an error (use \a NULL for default) +/// @param arg Extra argument that will be passed on to the error function. +/// +/// The \a errfun function is called on an error in mimalloc after emitting +/// an error message (through the output function). It as always legal to just +/// return from the \a errfun function in which case allocation functions generally +/// return \a NULL or ignore the condition. The default function only calls abort() +/// when compiled in secure mode with an \a EFAULT error. The possible error +/// codes are: +/// * \a EAGAIN: Double free was detected (only in debug and secure mode). +/// * \a EFAULT: Corrupted free list or meta-data was detected (only in debug and secure mode). +/// * \a ENOMEM: Not enough memory available to satisfy the request. +/// * \a EOVERFLOW: Too large a request, for example in mi_calloc(), the \a count and \a size parameters are too large. +/// * \a EINVAL: Trying to free or re-allocate an invalid pointer. +void mi_register_error(mi_error_fun* errfun, void* arg); + /// Is a pointer part of our heap? /// @param p The pointer to check. /// @returns \a true if this is a pointer into our heap. diff --git a/docs/group__extended.html b/docs/group__extended.html index 85ea3624..9e2a2efc 100644 --- a/docs/group__extended.html +++ b/docs/group__extended.html @@ -124,6 +124,9 @@ Typedefs typedef void() mi_output_fun(const char *msg, void *arg)  Type of output functions. More...
  +typedef void() mi_error_fun(int err, void *arg) + Type of error callback functions. More...
+  @@ -169,6 +172,9 @@ Functions + + + @@ -225,6 +231,30 @@ Functions
See also
mi_register_deferred_free
+ + + +

◆ mi_error_fun

+ +
+
+

Functions

void mi_register_output (mi_output_fun *out, void *arg)
 Register an output function. More...
 
void mi_register_error (mi_error_fun *errfun, void *arg)
 Register an error callback function. More...
 
bool mi_is_in_heap_region (const void *p)
 Is a pointer part of our heap? More...
 
+ + + +
typedef void() mi_error_fun(int err, void *arg)
+
+ +

Type of error callback functions.

+
Parameters
+ + + +
errError code (see mi_register_error() for a complete list).
argArgument that was passed at registration to hold extra state.
+
+
+
See also
mi_register_error()
+
@@ -419,6 +449,50 @@ Functions

Some runtime systems use deferred free-ing, for example when using reference counting to limit the worst case free time. Such systems can register (re-entrant) deferred free function to free more memory on demand. When the force parameter is true all possible memory should be freed. The per-thread heartbeat parameter is monotonically increasing and guaranteed to be deterministic if the program allocates deterministically. The deferred_free function is guaranteed to be called deterministically after some number of allocations (regardless of freeing or available free memory). At most one deferred_free function can be active.

+
+
+ +

◆ mi_register_error()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void mi_register_error (mi_error_funerrfun,
void * arg 
)
+
+ +

Register an error callback function.

+
Parameters
+ + + +
errfunThe error function that is called on an error (use NULL for default)
argExtra argument that will be passed on to the error function.
+
+
+

The errfun function is called on an error in mimalloc after emitting an error message (through the output function). It as always legal to just return from the errfun function in which case allocation functions generally return NULL or ignore the condition. The default function only calls abort() when compiled in secure mode with an EFAULT error. The possible error codes are:

    +
  • EAGAIN: Double free was detected (only in debug and secure mode).
  • +
  • EFAULT: Corrupted free list or meta-data was detected (only in debug and secure mode).
  • +
  • ENOMEM: Not enough memory available to satisfy the request.
  • +
  • EOVERFLOW: Too large a request, for example in mi_calloc(), the count and size parameters are too large.
  • +
  • EINVAL: Trying to free or re-allocate an invalid pointer.
  • +
+
diff --git a/docs/group__extended.js b/docs/group__extended.js index 7152b518..ff8891b2 100644 --- a/docs/group__extended.js +++ b/docs/group__extended.js @@ -2,6 +2,7 @@ var group__extended = [ [ "MI_SMALL_SIZE_MAX", "group__extended.html#ga1ea64283508718d9d645c38efc2f4305", null ], [ "mi_deferred_free_fun", "group__extended.html#ga299dae78d25ce112e384a98b7309c5be", null ], + [ "mi_error_fun", "group__extended.html#ga251d369cda3f1c2a955c555486ed90e5", null ], [ "mi_output_fun", "group__extended.html#gad823d23444a4b77a40f66bf075a98a0c", null ], [ "mi_collect", "group__extended.html#ga421430e2226d7d468529cec457396756", null ], [ "mi_good_size", "group__extended.html#gac057927cd06c854b45fe7847e921bd47", null ], @@ -9,6 +10,7 @@ var group__extended = [ "mi_is_redirected", "group__extended.html#gaad25050b19f30cd79397b227e0157a3f", null ], [ "mi_malloc_small", "group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99", null ], [ "mi_register_deferred_free", "group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece", null ], + [ "mi_register_error", "group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45", null ], [ "mi_register_output", "group__extended.html#gae5b17ff027cd2150b43a33040250cf3f", null ], [ "mi_reserve_huge_os_pages_at", "group__extended.html#ga7795a13d20087447281858d2c771cca1", null ], [ "mi_reserve_huge_os_pages_interleave", "group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50", null ], diff --git a/docs/group__posix.html b/docs/group__posix.html index 65e8ff7e..eaa4a10f 100644 --- a/docs/group__posix.html +++ b/docs/group__posix.html @@ -140,6 +140,9 @@ Functions void * mi_new (std::size_t n) noexcept(false)  raise std::bad_alloc exception on failure. More...
  +void * mi_new_n (size_t count, size_t size) noexcept(false) + raise std::bad_alloc exception on failure or overflow. More...
+  void * mi_new_aligned (std::size_t n, std::align_val_t alignment) noexcept(false)  raise std::bad_alloc exception on failure. More...
  @@ -484,6 +487,44 @@ Functions

return NULL on failure.

+
+
+ +

◆ mi_new_n()

+ +
+
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
void* mi_new_n (size_t count,
size_t size 
)
+
+noexcept
+
+ +

raise std::bad_alloc exception on failure or overflow.

+
diff --git a/docs/group__posix.js b/docs/group__posix.js index 5584092b..0f2b895d 100644 --- a/docs/group__posix.js +++ b/docs/group__posix.js @@ -12,6 +12,7 @@ var group__posix = [ "mi_new", "group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545", null ], [ "mi_new_aligned", "group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3", null ], [ "mi_new_aligned_nothrow", "group__posix.html#gab5e29558926d934c3f1cae8c815f942c", null ], + [ "mi_new_n", "group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81", null ], [ "mi_new_nothrow", "group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a", null ], [ "mi_posix_memalign", "group__posix.html#gacff84f226ba9feb2031b8992e5579447", null ], [ "mi_pvalloc", "group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e", null ], diff --git a/docs/mimalloc-doc_8h_source.html b/docs/mimalloc-doc_8h_source.html index c240f151..12d0f799 100644 --- a/docs/mimalloc-doc_8h_source.html +++ b/docs/mimalloc-doc_8h_source.html @@ -102,7 +102,7 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
mimalloc-doc.h
-
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(void* out);
304 
310 void mi_stats_print(mi_output_fun* out, void* arg);
311 
313 void mi_stats_reset(void);
314 
316 void mi_stats_merge(void);
317 
321 void mi_thread_init(void);
322 
327 void mi_thread_done(void);
328 
334 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
335 
342 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
343 
359 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
360 
366 typedef void (mi_output_fun)(const char* msg, void* arg);
367 
374 void mi_register_output(mi_output_fun* out, void* arg);
375 
380 bool mi_is_in_heap_region(const void* p);
381 
382 
395 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
396 
409 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
410 
411 
416 bool mi_is_redirected();
417 
418 
420 
421 // ------------------------------------------------------
422 // Aligned allocation
423 // ------------------------------------------------------
424 
430 
443 void* mi_malloc_aligned(size_t size, size_t alignment);
444 void* mi_zalloc_aligned(size_t size, size_t alignment);
445 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
446 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
447 
458 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
459 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
460 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
461 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
462 
464 
470 
475 struct mi_heap_s;
476 
481 typedef struct mi_heap_s mi_heap_t;
482 
485 
493 void mi_heap_delete(mi_heap_t* heap);
494 
502 void mi_heap_destroy(mi_heap_t* heap);
503 
508 
512 
519 
521 void mi_heap_collect(mi_heap_t* heap, bool force);
522 
525 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
526 
530 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
531 
534 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
535 
538 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
539 
542 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
543 
546 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
547 
550 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
551 
554 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
555 
556 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
557 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
558 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
559 
560 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
561 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
562 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
563 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
564 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
565 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
566 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
567 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
568 
570 
571 
580 
581 void* mi_rezalloc(void* p, size_t newsize);
582 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
583 
584 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
585 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
586 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
587 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
588 
589 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
590 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
591 
592 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
593 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
594 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
595 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
596 
598 
604 
616 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
617 
619 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
620 
622 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
623 
625 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
626 
628 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
629 
631 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
632 
634 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
635 
637 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
638 
640 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
641 
643 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
644 
646 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
647 
649 
655 
662 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
663 
672 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
673 
681 bool mi_check_owned(const void* p);
682 
685 typedef struct mi_heap_area_s {
686  void* blocks;
687  size_t reserved;
688  size_t committed;
689  size_t used;
690  size_t block_size;
692 
700 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
701 
713 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
714 
716 
722 
724 typedef enum mi_option_e {
725  // stable options
729  // the following options are experimental
743 } mi_option_t;
744 
745 
746 bool mi_option_enabled(mi_option_t option);
747 void mi_option_enable(mi_option_t option, bool enable);
748 void mi_option_enable_default(mi_option_t option, bool enable);
749 
750 long mi_option_get(mi_option_t option);
751 void mi_option_set(mi_option_t option, long value);
752 void mi_option_set_default(mi_option_t option, long value);
753 
754 
756 
763 
764 void* mi_recalloc(void* p, size_t count, size_t size);
765 size_t mi_malloc_size(const void* p);
766 size_t mi_malloc_usable_size(const void *p);
767 
769 void mi_cfree(void* p);
770 
771 int mi_posix_memalign(void** p, size_t alignment, size_t size);
772 int mi__posix_memalign(void** p, size_t alignment, size_t size);
773 void* mi_memalign(size_t alignment, size_t size);
774 void* mi_valloc(size_t size);
775 
776 void* mi_pvalloc(size_t size);
777 void* mi_aligned_alloc(size_t alignment, size_t size);
778 void* mi_reallocarray(void* p, size_t count, size_t size);
779 
780 void mi_free_size(void* p, size_t size);
781 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
782 void mi_free_aligned(void* p, size_t alignment);
783 
785 void* mi_new(std::size_t n) noexcept(false);
786 
788 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
789 
791 void* mi_new_nothrow(size_t n);
792 ``
794 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
795 
797 
void mi_option_enable_default(mi_option_t option, bool enable)
+
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(void* out);
304 
310 void mi_stats_print(mi_output_fun* out, void* arg);
311 
313 void mi_stats_reset(void);
314 
316 void mi_stats_merge(void);
317 
321 void mi_thread_init(void);
322 
327 void mi_thread_done(void);
328 
334 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
335 
342 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
343 
359 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
360 
366 typedef void (mi_output_fun)(const char* msg, void* arg);
367 
374 void mi_register_output(mi_output_fun* out, void* arg);
375 
381 typedef void (mi_error_fun)(int err, void* arg);
382 
398 void mi_register_error(mi_error_fun* errfun, void* arg);
399 
404 bool mi_is_in_heap_region(const void* p);
405 
406 
419 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
420 
433 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
434 
435 
440 bool mi_is_redirected();
441 
442 
444 
445 // ------------------------------------------------------
446 // Aligned allocation
447 // ------------------------------------------------------
448 
454 
467 void* mi_malloc_aligned(size_t size, size_t alignment);
468 void* mi_zalloc_aligned(size_t size, size_t alignment);
469 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
470 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
471 
482 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
483 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
484 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
485 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
486 
488 
494 
499 struct mi_heap_s;
500 
505 typedef struct mi_heap_s mi_heap_t;
506 
509 
517 void mi_heap_delete(mi_heap_t* heap);
518 
526 void mi_heap_destroy(mi_heap_t* heap);
527 
532 
536 
543 
545 void mi_heap_collect(mi_heap_t* heap, bool force);
546 
549 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
550 
554 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
555 
558 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
559 
562 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
563 
566 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
567 
570 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
571 
574 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
575 
578 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
579 
580 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
581 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
582 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
583 
584 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
585 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
586 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
587 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
588 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
589 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
590 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
591 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
592 
594 
595 
604 
605 void* mi_rezalloc(void* p, size_t newsize);
606 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
607 
608 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
609 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
610 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
611 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
612 
613 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
614 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
615 
616 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
617 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
618 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
619 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
620 
622 
628 
640 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
641 
643 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
644 
646 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
647 
649 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
650 
652 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
653 
655 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
656 
658 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
659 
661 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
662 
664 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
665 
667 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
668 
670 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
671 
673 
679 
686 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
687 
696 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
697 
705 bool mi_check_owned(const void* p);
706 
709 typedef struct mi_heap_area_s {
710  void* blocks;
711  size_t reserved;
712  size_t committed;
713  size_t used;
714  size_t block_size;
716 
724 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
725 
737 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
738 
740 
746 
748 typedef enum mi_option_e {
749  // stable options
753  // the following options are experimental
767 } mi_option_t;
768 
769 
770 bool mi_option_enabled(mi_option_t option);
771 void mi_option_enable(mi_option_t option, bool enable);
772 void mi_option_enable_default(mi_option_t option, bool enable);
773 
774 long mi_option_get(mi_option_t option);
775 void mi_option_set(mi_option_t option, long value);
776 void mi_option_set_default(mi_option_t option, long value);
777 
778 
780 
787 
788 void* mi_recalloc(void* p, size_t count, size_t size);
789 size_t mi_malloc_size(const void* p);
790 size_t mi_malloc_usable_size(const void *p);
791 
793 void mi_cfree(void* p);
794 
795 int mi_posix_memalign(void** p, size_t alignment, size_t size);
796 int mi__posix_memalign(void** p, size_t alignment, size_t size);
797 void* mi_memalign(size_t alignment, size_t size);
798 void* mi_valloc(size_t size);
799 
800 void* mi_pvalloc(size_t size);
801 void* mi_aligned_alloc(size_t alignment, size_t size);
802 void* mi_reallocarray(void* p, size_t count, size_t size);
803 
804 void mi_free_size(void* p, size_t size);
805 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
806 void mi_free_aligned(void* p, size_t alignment);
807 
809 void* mi_new(std::size_t n) noexcept(false);
810 
812 void* mi_new_n(size_t count, size_t size) noexcept(false);
813 
815 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
816 
818 void* mi_new_nothrow(size_t n);
819 ``
821 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
822 
824 
void mi_option_enable_default(mi_option_t option, bool enable)
size_t mi_usable_size(void *p)
Return the available bytes in a memory block.
void * mi_reallocn(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes.
void * mi_malloc_aligned(size_t size, size_t alignment)
Allocate size bytes aligned by alignment.
@@ -116,16 +116,18 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void mi_stats_merge(void)
Merge thread local statistics with the main statistics and reset.
void mi_option_set_default(mi_option_t option, long value)
void * mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false)
raise std::bad_alloc exception on failure.
+
void() mi_error_fun(int err, void *arg)
Type of error callback functions.
Definition: mimalloc-doc.h:381
void * mi_rezalloc(void *p, size_t newsize)
-
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:730
+
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:754
void * mi_heap_zalloc(mi_heap_t *heap, size_t size)
Allocate zero-initialized in a specific heap.
void mi_option_set(mi_option_t option, long value)
-
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:731
+
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:755
void mi_cfree(void *p)
Just as free but also checks if the pointer p belongs to our heap.
void * mi_recalloc_aligned(void *p, size_t newcount, size_t size, size_t alignment)
-
Definition: mimalloc-doc.h:742
+
Definition: mimalloc-doc.h:766
void * mi_realloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:686
+
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:710
+
void * mi_new_n(size_t count, size_t size) noexcept(false)
raise std::bad_alloc exception on failure or overflow.
void * mi_realloc_aligned(void *p, size_t newsize, size_t alignment)
int mi__posix_memalign(void **p, size_t alignment, size_t size)
void mi_free(void *p)
Free previously allocated memory.
@@ -141,36 +143,37 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void * mi_heap_rezalloc_aligned_at(mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
void * mi_zalloc(size_t size)
Allocate zero-initialized size bytes.
void * mi_heap_rezalloc(mi_heap_t *heap, void *p, size_t newsize)
-
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:734
+
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:758
void * mi_heap_calloc(mi_heap_t *heap, size_t count, size_t size)
Allocate count zero-initialized elements in a specific heap.
void * mi_new(std::size_t n) noexcept(false)
raise std::bad_alloc exception on failure.
void * mi_heap_calloc_aligned(mi_heap_t *heap, size_t count, size_t size, size_t alignment)
bool mi_is_redirected()
Is the C runtime malloc API redirected?
-
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:690
+
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:714
void * mi_reallocarray(void *p, size_t count, size_t size)
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most t...
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
Type of deferred free functions.
Definition: mimalloc-doc.h:342
bool mi_is_in_heap_region(const void *p)
Is a pointer part of our heap?
void mi_option_enable(mi_option_t option, bool enable)
void * mi_realloc(void *p, size_t newsize)
Re-allocate memory to newsize bytes.
-
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:733
+
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:757
void * mi_heap_reallocf(mi_heap_t *heap, void *p, size_t newsize)
void mi_free_size_aligned(void *p, size_t size, size_t alignment)
void * mi_rezalloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
Reset page memory after mi_option_reset_delay milliseconds when it becomes free.
Definition: mimalloc-doc.h:735
+
Reset page memory after mi_option_reset_delay milliseconds when it becomes free.
Definition: mimalloc-doc.h:759
void mi_thread_done(void)
Uninitialize mimalloc on a thread.
bool mi_heap_visit_blocks(const mi_heap_t *heap, bool visit_all_blocks, mi_block_visit_fun *visitor, void *arg)
Visit all areas and blocks in a heap.
-
Pretend there are at most N NUMA nodes.
Definition: mimalloc-doc.h:738
+
Pretend there are at most N NUMA nodes.
Definition: mimalloc-doc.h:762
void * mi_malloc(size_t size)
Allocate size bytes.
bool mi_option_enabled(mi_option_t option)
-
Experimental.
Definition: mimalloc-doc.h:739
+
void mi_register_error(mi_error_fun *errfun, void *arg)
Register an error callback function.
+
Experimental.
Definition: mimalloc-doc.h:763
char * mi_heap_strndup(mi_heap_t *heap, const char *s, size_t n)
Duplicate a string of at most length n in a specific heap.
-
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:700
+
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:724
void * mi_heap_recalloc(mi_heap_t *heap, void *p, size_t newcount, size_t size)
void * mi_heap_malloc_aligned_at(mi_heap_t *heap, size_t size, size_t alignment, size_t offset)
char * mi_realpath(const char *fname, char *resolved_name)
Resolve a file path name.
-
Print error messages to stderr.
Definition: mimalloc-doc.h:727
-
Experimental.
Definition: mimalloc-doc.h:736
+
Print error messages to stderr.
Definition: mimalloc-doc.h:751
+
Experimental.
Definition: mimalloc-doc.h:760
void * mi_heap_rezalloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
void * mi_memalign(size_t alignment, size_t size)
void * mi_new_aligned_nothrow(size_t n, size_t alignment)
return NULL on failure.
@@ -179,11 +182,11 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
bool mi_heap_contains_block(mi_heap_t *heap, const void *p)
Does a heap contain a pointer to a previously allocated block?
void mi_heap_collect(mi_heap_t *heap, bool force)
Release outstanding resources in a specific heap.
void * mi_heap_recalloc_aligned_at(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
-
Print verbose messages to stderr.
Definition: mimalloc-doc.h:728
+
Print verbose messages to stderr.
Definition: mimalloc-doc.h:752
void * mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset)
void * mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset)
Allocate size bytes aligned by alignment at a specified offset.
void mi_heap_delete(mi_heap_t *heap)
Delete a previously allocated heap.
-
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:741
+
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:765
mi_heap_t * mi_heap_get_default()
Get the default heap that is used for mi_malloc() et al.
int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs ...
void * mi_aligned_alloc(size_t alignment, size_t size)
@@ -191,22 +194,22 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void mi_thread_init(void)
Initialize mimalloc on a thread.
size_t mi_good_size(size_t size)
Return the used allocation size.
void mi_stats_print(void *out)
Print the main statistics.
-
Experimental.
Definition: mimalloc-doc.h:740
+
Experimental.
Definition: mimalloc-doc.h:764
void * mi_heap_recalloc_aligned(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment)
void * mi_heap_mallocn(mi_heap_t *heap, size_t count, size_t size)
Allocate count elements in a specific heap.
-
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:685
+
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:709
void mi_thread_stats_print_out(mi_output_fun *out, void *arg)
Print out heap statistics for this thread.
-
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:726
+
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:750
void * mi_zalloc_aligned(size_t size, size_t alignment)
-
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:687
-
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:481
-
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:689
+
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:711
+
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:505
+
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:713
void mi_register_deferred_free(mi_deferred_free_fun *deferred_free, void *arg)
Register a deferred free function.
void mi_free_size(void *p, size_t size)
void mi_collect(bool force)
Eagerly free memory.
void mi_heap_destroy(mi_heap_t *heap)
Destroy a heap, freeing all its still allocated blocks.
void * mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset)
-
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:732
+
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:756
void * mi_heap_reallocn(mi_heap_t *heap, void *p, size_t count, size_t size)
void mi_register_output(mi_output_fun *out, void *arg)
Register an output function.
void * mi_heap_malloc_small(mi_heap_t *heap, size_t size)
Allocate a small object in a specific heap.
@@ -224,11 +227,11 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
long mi_option_get(mi_option_t option)
mi_heap_t * mi_heap_get_backing()
Get the backing heap.
void mi_free_aligned(void *p, size_t alignment)
-
Delay in milli-seconds before resetting a page (100ms by default)
Definition: mimalloc-doc.h:737
+
Delay in milli-seconds before resetting a page (100ms by default)
Definition: mimalloc-doc.h:761
mi_heap_t * mi_heap_new()
Create a new heap that can be used for allocation.
void * mi_heap_malloc(mi_heap_t *heap, size_t size)
Allocate in a specific heap.
-
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:688
-
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:724
+
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:712
+
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:748
bool mi_heap_check_owned(mi_heap_t *heap, const void *p)
Check safely if any pointer is part of a heap.
mi_heap_t * mi_heap_set_default(mi_heap_t *heap)
Set the default heap to use for mi_malloc() et al.
diff --git a/docs/navtreeindex0.js b/docs/navtreeindex0.js index d1b0e072..e2667728 100644 --- a/docs/navtreeindex0.js +++ b/docs/navtreeindex0.js @@ -29,27 +29,29 @@ var NAVTREEINDEX0 = "group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65":[5,6,1], "group__analysis.html#structmi__heap__area__t":[5,6,0], "group__extended.html":[5,1], -"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,19], -"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,16], +"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,21], +"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,18], "group__extended.html#ga1ea64283508718d9d645c38efc2f4305":[5,1,0], -"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,20], -"group__extended.html#ga256cc6f13a142deabbadd954a217e228":[5,1,14], +"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,22], +"group__extended.html#ga251d369cda3f1c2a955c555486ed90e5":[5,1,2], +"group__extended.html#ga256cc6f13a142deabbadd954a217e228":[5,1,16], "group__extended.html#ga299dae78d25ce112e384a98b7309c5be":[5,1,1], -"group__extended.html#ga2d126e5c62d3badc35445e5d84166df2":[5,1,13], -"group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50":[5,1,11], -"group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece":[5,1,8], -"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,15], -"group__extended.html#ga421430e2226d7d468529cec457396756":[5,1,3], -"group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6":[5,1,5], -"group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99":[5,1,7], -"group__extended.html#ga7795a13d20087447281858d2c771cca1":[5,1,10], -"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,12], -"group__extended.html#gaad25050b19f30cd79397b227e0157a3f":[5,1,6], -"group__extended.html#gab1dac8476c46cb9eecab767eb40c1525":[5,1,18], -"group__extended.html#gac057927cd06c854b45fe7847e921bd47":[5,1,4], -"group__extended.html#gad823d23444a4b77a40f66bf075a98a0c":[5,1,2], -"group__extended.html#gae5b17ff027cd2150b43a33040250cf3f":[5,1,9], -"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,17], +"group__extended.html#ga2d126e5c62d3badc35445e5d84166df2":[5,1,15], +"group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50":[5,1,13], +"group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece":[5,1,9], +"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,17], +"group__extended.html#ga421430e2226d7d468529cec457396756":[5,1,4], +"group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6":[5,1,6], +"group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99":[5,1,8], +"group__extended.html#ga7795a13d20087447281858d2c771cca1":[5,1,12], +"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,14], +"group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45":[5,1,10], +"group__extended.html#gaad25050b19f30cd79397b227e0157a3f":[5,1,7], +"group__extended.html#gab1dac8476c46cb9eecab767eb40c1525":[5,1,20], +"group__extended.html#gac057927cd06c854b45fe7847e921bd47":[5,1,5], +"group__extended.html#gad823d23444a4b77a40f66bf075a98a0c":[5,1,3], +"group__extended.html#gae5b17ff027cd2150b43a33040250cf3f":[5,1,11], +"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,19], "group__heap.html":[5,3], "group__heap.html#ga00e95ba1e01acac3cfd95bb7a357a6f0":[5,3,20], "group__heap.html#ga08ca6419a5c057a4d965868998eef487":[5,3,3], @@ -121,18 +123,19 @@ var NAVTREEINDEX0 = "group__posix.html#ga0d28d5cf61e6bfbb18c63092939fe5c9":[5,8,3], "group__posix.html#ga1326d2e4388630b5f81ca7206318b8e5":[5,8,1], "group__posix.html#ga4531c9e775bb3ae12db57c1ba8a5d7de":[5,8,6], -"group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088":[5,8,15], +"group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088":[5,8,16], "group__posix.html#ga705dc7a64bffacfeeb0141501a5c35d7":[5,8,2], "group__posix.html#ga72e9d7ffb5fe94d69bc722c8506e27bc":[5,8,5], -"group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b":[5,8,16], +"group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b":[5,8,17], "group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e":[5,8,8], "group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545":[5,8,9], "group__posix.html#gab5e29558926d934c3f1cae8c815f942c":[5,8,11], -"group__posix.html#gacff84f226ba9feb2031b8992e5579447":[5,8,13], +"group__posix.html#gacff84f226ba9feb2031b8992e5579447":[5,8,14], "group__posix.html#gad5a69c8fea96aa2b7a7c818c2130090a":[5,8,0], "group__posix.html#gae01389eedab8d67341ff52e2aad80ebb":[5,8,4], -"group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a":[5,8,12], -"group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e":[5,8,14], +"group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81":[5,8,12], +"group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a":[5,8,13], +"group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e":[5,8,15], "group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3":[5,8,10], "group__typed.html":[5,5], "group__typed.html#ga0619a62c5fd886f1016030abe91f0557":[5,5,7], diff --git a/docs/search/all_6.js b/docs/search/all_6.js index cc7a26ec..7af11c0f 100644 --- a/docs/search/all_6.js +++ b/docs/search/all_6.js @@ -11,6 +11,7 @@ var searchData= ['mi_5fcheck_5fowned',['mi_check_owned',['../group__analysis.html#ga628c237489c2679af84a4d0d143b3dd5',1,'mimalloc-doc.h']]], ['mi_5fcollect',['mi_collect',['../group__extended.html#ga421430e2226d7d468529cec457396756',1,'mimalloc-doc.h']]], ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga299dae78d25ce112e384a98b7309c5be',1,'mimalloc-doc.h']]], + ['mi_5ferror_5ffun',['mi_error_fun',['../group__extended.html#ga251d369cda3f1c2a955c555486ed90e5',1,'mimalloc-doc.h']]], ['mi_5fexpand',['mi_expand',['../group__malloc.html#gaaee66a1d483c3e28f585525fb96707e4',1,'mimalloc-doc.h']]], ['mi_5ffree',['mi_free',['../group__malloc.html#gaf2c7b89c327d1f60f59e68b9ea644d95',1,'mimalloc-doc.h']]], ['mi_5ffree_5faligned',['mi_free_aligned',['../group__posix.html#ga0d28d5cf61e6bfbb18c63092939fe5c9',1,'mimalloc-doc.h']]], @@ -75,6 +76,7 @@ var searchData= ['mi_5fnew',['mi_new',['../group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fn',['mi_new_n',['../group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit',['mi_option_eager_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit_5fdelay',['mi_option_eager_commit_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c',1,'mimalloc-doc.h']]], @@ -113,6 +115,7 @@ var searchData= ['mi_5frecalloc_5faligned',['mi_recalloc_aligned',['../group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned_5fat',['mi_recalloc_aligned_at',['../group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9',1,'mimalloc-doc.h']]], ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece',1,'mimalloc-doc.h']]], + ['mi_5fregister_5ferror',['mi_register_error',['../group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45',1,'mimalloc-doc.h']]], ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#gae5b17ff027cd2150b43a33040250cf3f',1,'mimalloc-doc.h']]], ['mi_5freserve_5fhuge_5fos_5fpages_5fat',['mi_reserve_huge_os_pages_at',['../group__extended.html#ga7795a13d20087447281858d2c771cca1',1,'mimalloc-doc.h']]], ['mi_5freserve_5fhuge_5fos_5fpages_5finterleave',['mi_reserve_huge_os_pages_interleave',['../group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50',1,'mimalloc-doc.h']]], diff --git a/docs/search/functions_0.js b/docs/search/functions_0.js index d1d209a1..098041bb 100644 --- a/docs/search/functions_0.js +++ b/docs/search/functions_0.js @@ -62,6 +62,7 @@ var searchData= ['mi_5fnew',['mi_new',['../group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fn',['mi_new_n',['../group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], ['mi_5foption_5fenable',['mi_option_enable',['../group__options.html#ga6d45a20a3131f18bc351b69763b38ce4',1,'mimalloc-doc.h']]], ['mi_5foption_5fenable_5fdefault',['mi_option_enable_default',['../group__options.html#ga37988264b915a7db92530cc02d5494cb',1,'mimalloc-doc.h']]], @@ -82,6 +83,7 @@ var searchData= ['mi_5frecalloc_5faligned',['mi_recalloc_aligned',['../group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned_5fat',['mi_recalloc_aligned_at',['../group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9',1,'mimalloc-doc.h']]], ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece',1,'mimalloc-doc.h']]], + ['mi_5fregister_5ferror',['mi_register_error',['../group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45',1,'mimalloc-doc.h']]], ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#gae5b17ff027cd2150b43a33040250cf3f',1,'mimalloc-doc.h']]], ['mi_5freserve_5fhuge_5fos_5fpages_5fat',['mi_reserve_huge_os_pages_at',['../group__extended.html#ga7795a13d20087447281858d2c771cca1',1,'mimalloc-doc.h']]], ['mi_5freserve_5fhuge_5fos_5fpages_5finterleave',['mi_reserve_huge_os_pages_interleave',['../group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50',1,'mimalloc-doc.h']]], diff --git a/docs/search/typedefs_0.js b/docs/search/typedefs_0.js index 17816828..44a0a6c6 100644 --- a/docs/search/typedefs_0.js +++ b/docs/search/typedefs_0.js @@ -2,6 +2,7 @@ var searchData= [ ['mi_5fblock_5fvisit_5ffun',['mi_block_visit_fun',['../group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65',1,'mimalloc-doc.h']]], ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga299dae78d25ce112e384a98b7309c5be',1,'mimalloc-doc.h']]], + ['mi_5ferror_5ffun',['mi_error_fun',['../group__extended.html#ga251d369cda3f1c2a955c555486ed90e5',1,'mimalloc-doc.h']]], ['mi_5fheap_5ft',['mi_heap_t',['../group__heap.html#ga34a47cde5a5b38c29f1aa3c5e76943c2',1,'mimalloc-doc.h']]], ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#gad823d23444a4b77a40f66bf075a98a0c',1,'mimalloc-doc.h']]] ]; diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index f039fc50..eaa327be 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -23,25 +23,21 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(_MSC_VER) #pragma warning(disable:4127) // constant conditional due to MI_SECURE paths #define mi_decl_noinline __declspec(noinline) -#define mi_attr_noreturn #elif defined(__GNUC__) || defined(__clang__) #define mi_decl_noinline __attribute__((noinline)) -#define mi_attr_noreturn __attribute__((noreturn)) #else #define mi_decl_noinline -#define mi_attr_noreturn #endif // "options.c" void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); -void _mi_error_message(const char* fmt, ...); void _mi_warning_message(const char* fmt, ...); void _mi_verbose_message(const char* fmt, ...); void _mi_trace_message(const char* fmt, ...); void _mi_options_init(void); -void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn; +void _mi_error_message(int err, const char* fmt, ...); // random.c void _mi_random_init(mi_random_ctx_t* ctx); @@ -146,6 +142,29 @@ bool _mi_page_is_valid(mi_page_t* page); #endif +/* ----------------------------------------------------------- + Error codes passed to `_mi_fatal_error` + All are recoverable but EFAULT is a serious error and aborts by default in secure mode. + For portability define undefined error codes using common Unix codes: + +----------------------------------------------------------- */ +#include +#ifndef EAGAIN // double free +#define EAGAIN (11) +#endif +#ifndef ENOMEM // out of memory +#define ENOMEM (12) +#endif +#ifndef EFAULT // corrupted free-list or meta-data +#define EFAULT (14) +#endif +#ifndef EINVAL // trying to free an invalid pointer +#define EINVAL (22) +#endif +#ifndef EOVERFLOW // count*size overflow +#define EOVERFLOW (75) +#endif + /* ----------------------------------------------------------- Inlined definitions @@ -166,30 +185,6 @@ bool _mi_page_is_valid(mi_page_t* page); #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) -// Overflow detecting multiply -static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { - // quick check for the case where count is one (common for C++ allocators) - if (count==1) { - *total = size; - return false; - } -#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 -#include // UINT_MAX, ULONG_MAX -#if (SIZE_MAX == UINT_MAX) - return __builtin_umul_overflow(count, size, total); -#elif (SIZE_MAX == ULONG_MAX) - return __builtin_umull_overflow(count, size, total); -#else - return __builtin_umulll_overflow(count, size, total); -#endif -#else /* __builtin_umul_overflow is unavailable */ - #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) - *total = count * size; - return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) - && size > 0 && (SIZE_MAX / size) < count); -#endif -} - // Is `x` a power of two? (0 is considered a power of two) static inline bool _mi_is_power_of_two(uintptr_t x) { return ((x & (x - 1)) == 0); @@ -229,6 +224,40 @@ static inline size_t _mi_wsize_from_size(size_t size) { } +// Overflow detecting multiply +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { +#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 +#include // UINT_MAX, ULONG_MAX +#if (SIZE_MAX == UINT_MAX) + return __builtin_umul_overflow(count, size, total); +#elif (SIZE_MAX == ULONG_MAX) + return __builtin_umull_overflow(count, size, total); +#else + return __builtin_umulll_overflow(count, size, total); +#endif +#else /* __builtin_umul_overflow is unavailable */ + #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) + *total = count * size; + return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) + && size > 0 && (SIZE_MAX / size) < count); +#endif +} + +// Safe multiply `count*size` into `total`; return `true` on overflow. +static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) { + if (count==1) { // quick check for the case where count is one (common for C++ allocators) + *total = size; + return false; + } + else if (mi_unlikely(mi_mul_overflow(count, size, total))) { + _mi_error_message(EOVERFLOW, "allocation request too large (%zu * %zu bytes)\n", count, size); + *total = SIZE_MAX; + return true; + } + else return false; +} + + /* ----------------------------------------------------------- The thread local default heap ----------------------------------------------------------- */ @@ -506,7 +535,7 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { - _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); + _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); next = NULL; } return next; diff --git a/include/mimalloc.h b/include/mimalloc.h index 67ff1a35..1250314c 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -104,16 +104,23 @@ mi_decl_export mi_decl_allocator void* mi_mallocn(size_t count, size_t size) mi_decl_export mi_decl_allocator void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); - mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; + +// ------------------------------------------------------ +// Internals +// ------------------------------------------------------ + typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept; typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg); mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; +typedef void (mi_cdecl mi_error_fun)(int err, void* arg); +mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg); + mi_decl_export void mi_collect(bool force) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept; mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; @@ -143,9 +150,9 @@ mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsiz mi_decl_export mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -// ------------------------------------------------------ -// Heaps -// ------------------------------------------------------ +// ------------------------------------------------------------------------------------- +// Heaps: first-class, but can only allocate from the same thread that created it. +// ------------------------------------------------------------------------------------- struct mi_heap_s; typedef struct mi_heap_s mi_heap_t; diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 5a59a63a..55b0e041 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -79,7 +79,7 @@ mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, siz mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_zalloc_aligned_at(heap, total, alignment, offset); } @@ -168,13 +168,13 @@ mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_ mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(newcount, size, &total)) return NULL; + if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset); } mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(newcount, size, &total)) return NULL; + if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned(heap, p, total, alignment); } diff --git a/src/alloc.c b/src/alloc.c index 37d43d9f..e605c017 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -146,7 +146,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con mi_list_contains(page, page->local_free, block) || mi_list_contains(page, mi_page_thread_free(page), block)) { - _mi_fatal_error("double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); + _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); return true; } return false; @@ -300,7 +300,7 @@ void mi_free(void* p) mi_attr_noexcept { #if (MI_DEBUG>0) if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) { - _mi_error_message("trying to free an invalid (unaligned) pointer: %p\n", p); + _mi_error_message(EINVAL, "trying to free an invalid (unaligned) pointer: %p\n", p); return; } #endif @@ -310,16 +310,16 @@ void mi_free(void* p) mi_attr_noexcept #if (MI_DEBUG!=0) if (mi_unlikely(!mi_is_in_heap_region(p))) { - _mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: 0x%p\n" + _mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: %p\n" "(this may still be a valid very large allocation (over 64MiB))\n", p); if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { - _mi_warning_message("(yes, the previous pointer 0x%p was valid after all)\n", p); + _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); } } #endif #if (MI_DEBUG!=0 || MI_SECURE>=4) if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { - _mi_error_message("trying to free a pointer that does not point to a valid heap space: %p\n", p); + _mi_error_message(EINVAL, "trying to free a pointer that does not point to a valid heap space: %p\n", p); return; } #endif @@ -432,7 +432,7 @@ void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { extern inline mi_decl_allocator void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count,size,&total)) return NULL; + if (mi_count_size_overflow(count,size,&total)) return NULL; return mi_heap_zalloc(heap,total); } @@ -443,7 +443,7 @@ mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { // Uninitialized `calloc` extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_malloc(heap, total); } @@ -484,7 +484,7 @@ mi_decl_allocator void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_realloc(heap, p, total); } @@ -502,7 +502,7 @@ mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsiz mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_rezalloc(heap, p, total); } @@ -570,7 +570,6 @@ char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { #define PATH_MAX MAX_PATH #endif #include -#include char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { // todo: use GetFullPathNameW to allow longer file names char buf[PATH_MAX]; @@ -645,10 +644,6 @@ static bool mi_try_new_handler(bool nothrow) { } } #else -#include -#ifndef ENOMEM -#define ENOMEM 12 -#endif typedef void (*std_new_handler_t)(); #if (defined(__GNUC__) || defined(__clang__)) @@ -668,7 +663,7 @@ std_new_handler_t mi_get_new_handler() { static bool mi_try_new_handler(bool nothrow) { std_new_handler_t h = mi_get_new_handler(); if (h==NULL) { - if (!nothrow) exit(ENOMEM); + if (!nothrow) exit(ENOMEM); // cannot throw in plain C, use exit as we are out of memory anyway. return false; } else { @@ -718,7 +713,7 @@ void* mi_new_aligned_nothrow(size_t size, size_t alignment) { void* mi_new_n(size_t count, size_t size) { size_t total; - if (mi_unlikely(mi_mul_overflow(count, size, &total))) { + if (mi_unlikely(mi_count_size_overflow(count, size, &total))) { mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc return NULL; } diff --git a/src/arena.c b/src/arena.c index 7f1a1caf..f20a03e9 100644 --- a/src/arena.c +++ b/src/arena.c @@ -229,18 +229,18 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); mi_assert_internal(arena != NULL); if (arena == NULL) { - _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); + _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx)); if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) { - _mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } const size_t blocks = mi_block_count_of_size(size); bool ones = mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!ones) { - _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); + _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size); return; }; } diff --git a/src/init.c b/src/init.c index b8422c2f..18a18f60 100644 --- a/src/init.c +++ b/src/init.c @@ -157,7 +157,7 @@ static bool _mi_heap_init(void) { // use `_mi_os_alloc` to allocate directly from the OS mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation? if (td == NULL) { - _mi_error_message("failed to allocate thread local heap memory\n"); + _mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n"); return false; } mi_tld_t* tld = &td->tld; diff --git a/src/options.c b/src/options.c index c12c77e0..b06cbdb4 100644 --- a/src/options.c +++ b/src/options.c @@ -287,14 +287,10 @@ void _mi_verbose_message(const char* fmt, ...) { va_end(args); } -void _mi_error_message(const char* fmt, ...) { +static void mi_show_error_message(const char* fmt, va_list args) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; if (mi_atomic_increment(&error_count) > mi_max_error_count) return; - va_list args; - va_start(args,fmt); - mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); - va_end(args); - mi_assert(false); + mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); } void _mi_warning_message(const char* fmt, ...) { @@ -314,14 +310,40 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co } #endif -mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) { +// -------------------------------------------------------- +// Errors +// -------------------------------------------------------- + +static mi_error_fun* volatile mi_error_handler; // = NULL +static volatile _Atomic(void*) mi_error_arg; // = NULL + +static void mi_error_default(int err) { + UNUSED(err); +#if (MI_SECURE>0) + if (err==EFAULT) { // abort on serious errors in secure mode (corrupted meta-data) + abort(); + } +#endif +} + +void mi_register_error(mi_error_fun* fun, void* arg) { + mi_error_handler = fun; // can be NULL + mi_atomic_write_ptr(&mi_error_arg, arg); +} + +void _mi_error_message(int err, const char* fmt, ...) { + // show detailed error message va_list args; va_start(args, fmt); - mi_vfprintf(NULL, NULL, "mimalloc: fatal: ", fmt, args); + mi_show_error_message(fmt, args); va_end(args); - #if (MI_SECURE>=0) - abort(); - #endif + // and call the error handler which may abort (or return normally) + if (mi_error_handler != NULL) { + mi_error_handler(err, mi_atomic_read_ptr(&mi_error_arg)); + } + else { + mi_error_default(err); + } } // -------------------------------------------------------- diff --git a/src/os.c b/src/os.c index b5bd0ad9..be507b69 100644 --- a/src/os.c +++ b/src/os.c @@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-atomic.h" #include // strerror -#include + #if defined(_WIN32) #include @@ -655,7 +655,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ if (err != 0) { err = errno; } #endif if (err != 0) { - _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); + _mi_warning_message("%s error: start: %p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); mi_mprotect_hint(err); } mi_assert_internal(err == 0); @@ -719,7 +719,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) int err = madvise(start, csize, MADV_DONTNEED); #endif if (err != 0) { - _mi_warning_message("madvise reset error: start: 0x%p, csize: 0x%x, errno: %i\n", start, csize, errno); + _mi_warning_message("madvise reset error: start: %p, csize: 0x%x, errno: %i\n", start, csize, errno); } //mi_assert(err == 0); if (err != 0) return false; @@ -774,7 +774,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (err != 0) { err = errno; } #endif if (err != 0) { - _mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + _mi_warning_message("mprotect error: start: %p, csize: 0x%x, err: %i\n", start, csize, err); mi_mprotect_hint(err); } return (err == 0); @@ -961,7 +961,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (p != addr) { // no success, issue a warning and break if (p != NULL) { - _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); + _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr); _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); } break; diff --git a/src/page.c b/src/page.c index 84baf306..d67a44de 100644 --- a/src/page.c +++ b/src/page.c @@ -175,7 +175,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page) } // if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free) if (count > max_count) { - _mi_fatal_error("corrupted thread-free list\n"); + _mi_error_message(EFAULT, "corrupted thread-free list\n"); return; // the thread-free items cannot be freed } @@ -796,7 +796,8 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_page_t* page; if (mi_unlikely(size > MI_LARGE_OBJ_SIZE_MAX)) { if (mi_unlikely(size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see ) - page = NULL; + _mi_error_message(EOVERFLOW, "allocation request is too large (%zu b requested)\n", size); + return NULL; } else { page = mi_huge_page_alloc(heap,size); @@ -806,7 +807,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // otherwise find a page with free blocks in our size segregated queues page = mi_find_free_page(heap,size); } - if (page == NULL) return NULL; // out of memory + if (mi_unlikely(page == NULL)) { // out of memory + _mi_error_message(ENOMEM, "cannot allocate memory (%zu bytes requested)\n", size); + return NULL; + } mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(mi_page_block_size(page) >= size); diff --git a/test/test-api.c b/test/test-api.c index 060efc44..68df314e 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -9,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file Testing allocators is difficult as bugs may only surface after particular allocation patterns. The main approach to testing _mimalloc_ is therefore to have extensive internal invariant checking (see `page_is_valid` in `page.c` -for example), which is enabled in debug mode with `-DMI_CHECK_FULL=ON`. +for example), which is enabled in debug mode with `-DMI_DEBUG_FULL=ON`. The main testing is then to run `mimalloc-bench` [1] using full invariant checking to catch any potential problems over a wide range of intensive allocation bench marks. @@ -88,6 +88,10 @@ int main() { CHECK_BODY("malloc-null",{ mi_free(NULL); }); + CHECK_BODY("calloc-overflow",{ + // use (size_t)&mi_calloc to get some number without triggering compiler warnings + result = (mi_calloc((size_t)&mi_calloc,SIZE_MAX/1000) == NULL); + }); // --------------------------------------------------- // Extended From 41e717c2e0bdb4e6c5cbd1fd8a0bae3b0afb46d2 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 18 Jan 2020 20:30:12 -0800 Subject: [PATCH 193/293] fix assertion in mi_block_zero_init (issue #194) --- src/alloc.c | 4 ++-- test/test-api.c | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index e605c017..8f98b647 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -92,9 +92,9 @@ extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // note: we need to initialize the whole block to zero, not just size // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) - UNUSED(size); + UNUSED_RELEASE(size); mi_assert_internal(p != NULL); - mi_assert_internal(size > 0 && mi_page_block_size(page) >= size); + mi_assert_internal(size >= 0 && mi_page_block_size(page) >= size); mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? diff --git a/test/test-api.c b/test/test-api.c index 68df314e..d7a7be59 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -92,6 +92,9 @@ int main() { // use (size_t)&mi_calloc to get some number without triggering compiler warnings result = (mi_calloc((size_t)&mi_calloc,SIZE_MAX/1000) == NULL); }); + CHECK_BODY("calloc0",{ + result = (mi_usable_size(mi_calloc(0,1000)) >= 0); + }); // --------------------------------------------------- // Extended From e8d7c80c74e7bcc789e2d2dd74e0306f5a0d9b8e Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 19 Jan 2020 17:33:36 -0800 Subject: [PATCH 194/293] fix build warnings on linux --- src/alloc.c | 18 +++++++++--------- test/test-api.c | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 8f98b647..7fc9023c 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -94,7 +94,7 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) UNUSED_RELEASE(size); mi_assert_internal(p != NULL); - mi_assert_internal(size >= 0 && mi_page_block_size(page) >= size); + mi_assert_internal(mi_page_block_size(page) >= size); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? @@ -141,7 +141,7 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { // The decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed + // Walk the free lists to verify positively if it is already freed if (mi_list_contains(page, page->free, block) || mi_list_contains(page, page->local_free, block) || mi_list_contains(page, mi_page_thread_free(page), block)) @@ -343,8 +343,8 @@ void mi_free(void* p) mi_attr_noexcept mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; - if (mi_unlikely(mi_page_all_free(page))) { - _mi_page_retire(page); + if (mi_unlikely(mi_page_all_free(page))) { + _mi_page_retire(page); } } else { @@ -695,8 +695,8 @@ void* mi_new_nothrow(size_t size) { void* mi_new_aligned(size_t size, size_t alignment) { void* p; - do { - p = mi_malloc_aligned(size, alignment); + do { + p = mi_malloc_aligned(size, alignment); } while(p == NULL && mi_try_new_handler(false)); return p; @@ -704,8 +704,8 @@ void* mi_new_aligned(size_t size, size_t alignment) { void* mi_new_aligned_nothrow(size_t size, size_t alignment) { void* p; - do { - p = mi_malloc_aligned(size, alignment); + do { + p = mi_malloc_aligned(size, alignment); } while(p == NULL && mi_try_new_handler(true)); return p; @@ -720,4 +720,4 @@ void* mi_new_n(size_t count, size_t size) { else { return mi_new(total); } -} \ No newline at end of file +} diff --git a/test/test-api.c b/test/test-api.c index d7a7be59..a837946f 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -93,7 +93,7 @@ int main() { result = (mi_calloc((size_t)&mi_calloc,SIZE_MAX/1000) == NULL); }); CHECK_BODY("calloc0",{ - result = (mi_usable_size(mi_calloc(0,1000)) >= 0); + result = (mi_usable_size(mi_calloc(0,1000)) <= 16); }); // --------------------------------------------------- From 9d7ac76d93c8995631bb7ed264406a12aa2564d2 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 19 Jan 2020 18:35:45 -0800 Subject: [PATCH 195/293] fix compilation under Intel C compiler (icc) --- CMakeLists.txt | 23 ++++++++++++++++------- include/mimalloc-atomic.h | 6 +++--- test/test-stress.c | 9 ++++++++- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 27729584..366ffc44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,7 +54,7 @@ endif() # Process options # ----------------------------------------------------------------------------- -if(CMAKE_C_COMPILER_ID MATCHES "MSVC") +if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") set(MI_USE_CXX "ON") endif() @@ -96,25 +96,34 @@ endif() if(MI_USE_CXX MATCHES "ON") message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) - set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) + set_source_files_properties(src/static.c test/test-api.c test/test-stress PROPERTIES LANGUAGE CXX ) if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang|Clang") list(APPEND mi_cflags -Wno-deprecated) endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + list(APPEND mi_cflags -Kc++) + endif() endif() # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas) + if(CMAKE_C_COMPILER_ID MATCHES "GNU") + list(APPEND mi_cflags -Wno-invalid-memory-model) + list(APPEND mi_cflags -fvisibility=hidden) + endif() +endif() + +if(CMAKE_C_COMPILER_ID MATCHES "Intel") + list(APPEND mi_cflags -Wall -fvisibility=hidden) +endif() + +if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel") if(MI_LOCAL_DYNAMIC_TLS MATCHES "ON") list(APPEND mi_cflags -ftls-model=local-dynamic) else() list(APPEND mi_cflags -ftls-model=initial-exec) endif() - if(CMAKE_C_COMPILER_ID MATCHES "GNU") - list(APPEND mi_cflags -Wno-invalid-memory-model) - list(APPEND mi_cflags -fvisibility=hidden) - list(APPEND mi_cflags -fbranch-target-load-optimize) - endif() endif() # extra needed libraries diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index ecdfba0d..5d140f0c 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -9,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MIMALLOC_ATOMIC_H // ------------------------------------------------------ -// Atomics +// Atomics // We need to be portable between C, C++, and MSVC. // ------------------------------------------------------ @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // Atomic operations specialized for mimalloc // ------------------------------------------------------ -// Atomically add a 64-bit value; returns the previous value. +// Atomically add a 64-bit value; returns the previous value. // Note: not using _Atomic(int64_t) as it is only used for statistics. static inline void mi_atomic_add64(volatile int64_t* p, int64_t add); @@ -43,7 +43,7 @@ static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x); -// Atomically compare and exchange a value; returns `true` if successful. +// Atomically compare and exchange a value; returns `true` if successful. // May fail spuriously. Memory ordering as release on success, and relaxed on failure. // (Note: expected and desired are in opposite order from atomic_compare_exchange) static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); diff --git a/test/test-stress.c b/test/test-stress.c index 42628d7c..83f9b87b 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -255,7 +255,6 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) { #else #include -#include static void* thread_entry(void* param) { stress((uintptr_t)param); @@ -275,8 +274,16 @@ static void run_os_threads(size_t nthreads) { custom_free(threads); } +#ifdef __cplusplus +#include +static void* atomic_exchange_ptr(volatile void** p, void* newval) { + return std::atomic_exchange_explicit((volatile std::atomic*)p, newval, std::memory_order_acquire); +} +#else +#include static void* atomic_exchange_ptr(volatile void** p, void* newval) { return atomic_exchange_explicit((volatile _Atomic(void*)*)p, newval, memory_order_acquire); } +#endif #endif From 514b3152839d6a5524d64c7a00f88875c9d5ec3f Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 19 Jan 2020 21:27:46 -0800 Subject: [PATCH 196/293] add max_size member to STL allocator --- include/mimalloc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/mimalloc.h b/include/mimalloc.h index 1250314c..add1c550 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -351,6 +351,7 @@ mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_at #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 #include // true_type +#include // PTRDIFF_MAX #endif template struct mi_stl_allocator { @@ -360,6 +361,7 @@ template struct mi_stl_allocator { using propagate_on_container_move_assignment = std::true_type; using propagate_on_container_swap = std::true_type; using is_always_equal = std::true_type; + size_t max_size() const noexcept { return (PTRDIFF_MAX / sizeof(value_type)); } #endif mi_stl_allocator() mi_attr_noexcept { } mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } From a33ebb8625fde438f61a5bddd0f71fa9adb7acb2 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 19 Jan 2020 22:14:35 -0800 Subject: [PATCH 197/293] add alloc_align attribute to aligned allocation functions --- include/mimalloc.h | 48 +++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index add1c550..153e11c7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -37,32 +37,40 @@ terms of the MIT license. A copy of the license can be found in the file #else #define mi_decl_allocator __declspec(restrict) #endif + #define mi_cdecl __cdecl #define mi_decl_thread __declspec(thread) #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) - #define mi_cdecl __cdecl + #define mi_attr_alloc_align(p) #elif defined(__GNUC__) || defined(__clang__) + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #define mi_decl_thread __thread #define mi_decl_export __attribute__((visibility("default"))) #define mi_decl_allocator #define mi_attr_malloc __attribute__((malloc)) - #if defined(__clang_major__) && (__clang_major__ < 4) + #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5) #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) + #elif defined(__INTEL_COMPILER) + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) #else #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) __attribute__((alloc_align(p))) #endif - #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #else + #define mi_cdecl #define mi_decl_thread __thread #define mi_decl_export #define mi_decl_allocator #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) - #define mi_cdecl + #define mi_attr_alloc_align(p) #endif // ------------------------------------------------------ @@ -140,13 +148,13 @@ mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_ // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -mi_decl_export mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_export mi_decl_allocator void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_export mi_decl_allocator void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_export mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); -mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); @@ -178,13 +186,13 @@ mi_decl_export char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noex mi_decl_export char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept; mi_decl_export char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept; -mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); -mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); @@ -198,17 +206,17 @@ mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* hea mi_decl_export mi_decl_allocator void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); mi_decl_export mi_decl_allocator void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); -mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); +mi_decl_export mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); mi_decl_export mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); mi_decl_export mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); -mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); -mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); +mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5); mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); @@ -314,11 +322,11 @@ mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; -mi_decl_export void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); mi_decl_export void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_export void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; @@ -335,9 +343,9 @@ mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; mi_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1,2); -mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_export void* mi_new_nothrow(size_t size) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); #ifdef __cplusplus } From b77be05e4001debdcdcdc27d82bebb6b04faac11 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 20 Jan 2020 12:14:34 -0800 Subject: [PATCH 198/293] only collect retired at fresh page allocation --- src/page.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/page.c b/src/page.c index d67a44de..7840a590 100644 --- a/src/page.c +++ b/src/page.c @@ -234,7 +234,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(!page->is_reset); mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); - mi_page_queue_push(heap, pq, page); + mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); } @@ -408,7 +408,7 @@ void _mi_page_retire(mi_page_t* page) { if (mi_likely(page->xblock_size <= MI_SMALL_SIZE_MAX && !mi_page_is_in_full(page))) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = 4; + page->retire_expire = 16; mi_assert_internal(mi_page_all_free(page)); return; // dont't free after all } @@ -514,7 +514,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co mi_assert_internal(page->capacity + extend <= page->reserved); mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); - + mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity); // initialize a sequential free list @@ -678,6 +678,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_stat_counter_increase(heap->tld->stats.searches, count); if (page == NULL) { + _mi_heap_collect_retired(heap, false); // perhaps make a page available page = mi_page_fresh(heap, pq); } else { @@ -686,8 +687,6 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p } mi_assert_internal(page == NULL || mi_page_immediate_available(page)); - // finally collect retired pages - _mi_heap_collect_retired(heap, false); return page; } From 146899af8aad7ee3b447f1d0ffe4939f8e3bcd88 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 20 Jan 2020 15:27:05 -0800 Subject: [PATCH 199/293] add missing members to stl allocator (#193) --- include/mimalloc.h | 51 +++++++++++++++++++++++++++++------------- test/main-override.cpp | 16 +++++++++++++ test/test-api.c | 8 +++---- 3 files changed, 56 insertions(+), 19 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 153e11c7..97c26e18 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -357,29 +357,50 @@ mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_at // --------------------------------------------------------------------------------------------- #ifdef __cplusplus +#include // std::numeric_limits #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 -#include // true_type -#include // PTRDIFF_MAX +#include // std::true_type +#include // std::forward #endif template struct mi_stl_allocator { - typedef T value_type; - #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 + typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef value_type& reference; + typedef value_type const& const_reference; + typedef value_type* pointer; + typedef value_type const* const_pointer; + template struct rebind { typedef mi_stl_allocator other; }; + + mi_stl_allocator() mi_attr_noexcept { } + mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept { } + template mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept { } + mi_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T* p, size_type) { mi_free(p); } + + #if (__cplusplus >= 201703L) // C++17 + T* allocate(size_type count) { return static_cast(mi_new_n(count, sizeof(T))); } + T* allocate(size_type count, const void*) { return allocate(count); } + #else + pointer allocate(size_type count, const void* = 0) { return static_cast(mi_new_n(count, sizeof(value_type))); } + #endif + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 using propagate_on_container_copy_assignment = std::true_type; using propagate_on_container_move_assignment = std::true_type; - using propagate_on_container_swap = std::true_type; - using is_always_equal = std::true_type; - size_t max_size() const noexcept { return (PTRDIFF_MAX / sizeof(value_type)); } - #endif - mi_stl_allocator() mi_attr_noexcept { } - mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } - template mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } - void deallocate(T* p, size_t /* count */) { mi_free(p); } - #if (__cplusplus >= 201703L) // C++17 - T* allocate(size_t count) { return (T*)mi_new_n(count, sizeof(T)); } + using propagate_on_container_swap = std::true_type; + using is_always_equal = std::true_type; + template void construct(U* p, Args&& ...args) { ::new(p) U(std::forward(args)...); } + template void destroy(U* p) mi_attr_noexcept { p->~U(); } #else - T* allocate(size_t count, const void* hint = 0) { (void)hint; return (T*)mi_new_n(count, sizeof(T)); } + void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } + void destroy(pointer p) { p->~value_type(); } #endif + + size_type max_size() const mi_attr_noexcept { return (std::numeric_limits::max() / sizeof(value_type)); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } }; template bool operator==(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return true; } diff --git a/test/main-override.cpp b/test/main-override.cpp index f7a7f1bd..d082ade3 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -6,6 +6,7 @@ #include #include +#include static void* p = malloc(8); @@ -69,3 +70,18 @@ public: static Static s = Static(); +bool test_stl_allocator1() { + std::vector> vec; + vec.push_back(1); + vec.pop_back(); + return vec.size() == 0; +} + +bool test_stl_allocator2() { + struct some_struct { int i; int j; double z; }; + + std::vector> vec; + vec.push_back(some_struct()); + vec.pop_back(); + return vec.size() == 0; +} \ No newline at end of file diff --git a/test/test-api.c b/test/test-api.c index a837946f..95891754 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -202,7 +202,7 @@ bool test_heap2() { bool test_stl_allocator1() { #ifdef __cplusplus - std::vector> vec; + std::vector > vec; vec.push_back(1); vec.pop_back(); return vec.size() == 0; @@ -211,11 +211,11 @@ bool test_stl_allocator1() { #endif } +struct some_struct { int i; int j; double z; }; + bool test_stl_allocator2() { #ifdef __cplusplus - struct some_struct { int i; int j; double z; }; - - std::vector> vec; + std::vector > vec; vec.push_back(some_struct()); vec.pop_back(); return vec.size() == 0; From 3957b2fd28e95e9dcc787ccda320abee412ac82e Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 20 Jan 2020 15:41:56 -0800 Subject: [PATCH 200/293] add mi_new_realloc(n) to support C++ style reallocation that raises std::bad_alloc on out-of-memory --- include/mimalloc.h | 6 +++++- src/alloc.c | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 97c26e18..3861ad4f 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -341,11 +341,15 @@ mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; +// The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`. +// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception). mi_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1,2); mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_export void* mi_new_nothrow(size_t size) mi_attr_malloc mi_attr_alloc_size(1); mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2); +mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); #ifdef __cplusplus } diff --git a/src/alloc.c b/src/alloc.c index 7fc9023c..20339204 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -721,3 +721,22 @@ void* mi_new_n(size_t count, size_t size) { return mi_new(total); } } + +void* mi_new_realloc(void* p, size_t newsize) { + void* q; + do { + q = mi_realloc(p, newsize); + } while (q == NULL && mi_try_new_handler(false)); + return q; +} + +void* mi_new_reallocn(void* p, size_t newcount, size_t size) { + size_t total; + if (mi_unlikely(mi_count_size_overflow(newcount, size, &total))) { + mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc + return NULL; + } + else { + return mi_new_realloc(p, total); + } +} From 5bc1c52ae6e83bc65c506d682bf732507b3e0f61 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 20 Jan 2020 17:34:29 -0800 Subject: [PATCH 201/293] update documentation --- doc/mimalloc-doc.h | 49 +++- docs/annotated.html | 1 + docs/annotated_dup.js | 3 +- docs/classes.html | 8 +- docs/group__cpp.html | 396 +++++++++++++++++++++++++++++++ docs/group__cpp.js | 11 + docs/group__posix.html | 169 ------------- docs/group__posix.js | 5 - docs/mimalloc-doc_8h_source.html | 15 +- docs/modules.html | 1 + docs/modules.js | 3 +- docs/navtreeindex0.js | 22 +- docs/search/all_3.js | 3 +- docs/search/all_6.js | 13 +- docs/search/classes_0.js | 3 +- docs/search/functions_0.js | 12 +- docs/search/groups_2.js | 2 +- docs/search/groups_3.js | 3 +- docs/search/groups_4.js | 3 +- docs/search/groups_5.js | 2 +- docs/search/groups_6.js | 2 +- docs/search/groups_7.js | 2 +- docs/search/groups_8.html | 30 +++ docs/search/groups_8.js | 4 + docs/search/searchdata.js | 2 +- docs/using.html | 2 +- test/main-override.cpp | 8 +- 27 files changed, 547 insertions(+), 227 deletions(-) create mode 100644 docs/group__cpp.html create mode 100644 docs/group__cpp.js create mode 100644 docs/search/groups_8.html create mode 100644 docs/search/groups_8.js diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index ca744e4c..3f24a623 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -74,6 +74,8 @@ Further information: - \ref typed - \ref analysis - \ref options +- \ref posix +- \ref cpp */ @@ -622,7 +624,10 @@ void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, siz /// \defgroup typed Typed Macros /// -/// Typed allocation macros +/// Typed allocation macros. For example: +/// ``` +/// int* p = mi_malloc_tp(int) +/// ``` /// /// \{ @@ -805,21 +810,51 @@ void mi_free_size(void* p, size_t size); void mi_free_size_aligned(void* p, size_t size, size_t alignment); void mi_free_aligned(void* p, size_t alignment); -/// raise `std::bad_alloc` exception on failure. +/// \} + +/// \defgroup cpp C++ wrappers +/// +/// `mi_` prefixed implementations of various allocation functions +/// that use C++ semantics on out-of-memory, generally calling +/// `std::get_new_handler` and raising a `std::bad_alloc` exception on failure. +/// +/// Note: use the `mimalloc-new-delete.h` header to override the \a new +/// and \a delete operators globally. The wrappers here are mostly +/// for convience for library writers that need to interface with +/// mimalloc from C++. +/// +/// \{ + +/// like mi_malloc(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. void* mi_new(std::size_t n) noexcept(false); -/// raise `std::bad_alloc` exception on failure or overflow. +/// like mi_mallocn(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. void* mi_new_n(size_t count, size_t size) noexcept(false); -/// raise `std::bad_alloc` exception on failure. +/// like mi_malloc_aligned(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false); -/// return `NULL` on failure. +/// like `mi_malloc`, but when out of memory, use `std::get_new_handler` but return \a NULL on failure. void* mi_new_nothrow(size_t n); -`` -/// return `NULL` on failure. + +/// like `mi_malloc_aligned`, but when out of memory, use `std::get_new_handler` but return \a NULL on failure. void* mi_new_aligned_nothrow(size_t n, size_t alignment); +/// like mi_realloc(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. +void* mi_new_realloc(void* p, size_t newsize); + +/// like mi_reallocn(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. +void* mi_new_reallocn(void* p, size_t newcount, size_t size); + +/// \a std::allocator implementation for mimalloc for use in STL containers. +/// For example: +/// ``` +/// std::vector > vec; +/// vec.push_back(1); +/// vec.pop_back(); +/// ``` +template struct mi_stl_allocator { } + /// \} /*! \page build Building diff --git a/docs/annotated.html b/docs/annotated.html index 4d2a8bcc..5120b803 100644 --- a/docs/annotated.html +++ b/docs/annotated.html @@ -105,6 +105,7 @@ $(document).ready(function(){initNavTree('annotated.html','');});
Here are the data structures with brief descriptions:
+
 Cmi_heap_area_tAn area of heap space contains blocks of a single size
 Cmi_stl_allocatorstd::allocator implementation for mimalloc for use in STL containers
diff --git a/docs/annotated_dup.js b/docs/annotated_dup.js index 6ed68bc3..67229123 100644 --- a/docs/annotated_dup.js +++ b/docs/annotated_dup.js @@ -1,4 +1,5 @@ var annotated_dup = [ - [ "mi_heap_area_t", "group__analysis.html#structmi__heap__area__t", "group__analysis_structmi__heap__area__t" ] + [ "mi_heap_area_t", "group__analysis.html#structmi__heap__area__t", "group__analysis_structmi__heap__area__t" ], + [ "mi_stl_allocator", "group__cpp.html#structmi__stl__allocator", null ] ]; \ No newline at end of file diff --git a/docs/classes.html b/docs/classes.html index e5ea3ea8..de960fb6 100644 --- a/docs/classes.html +++ b/docs/classes.html @@ -105,10 +105,10 @@ $(document).ready(function(){initNavTree('classes.html','');}); - - - + + + +
  m  
-
mi_heap_area_t   
mi_stl_allocator   
mi_heap_area_t   
diff --git a/docs/group__cpp.html b/docs/group__cpp.html new file mode 100644 index 00000000..caf758a8 --- /dev/null +++ b/docs/group__cpp.html @@ -0,0 +1,396 @@ + + + + + + + +mi-malloc: C++ wrappers + + + + + + + + + + + + + + + + +
+
+ + + + + + + + +
+
mi-malloc +  1.4 +
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+ +
+
C++ wrappers
+
+
+ +

mi_ prefixed implementations of various allocation functions that use C++ semantics on out-of-memory, generally calling std::get_new_handler and raising a std::bad_alloc exception on failure. +More...

+ + + + + +

+Data Structures

struct  mi_stl_allocator< T >
 std::allocator implementation for mimalloc for use in STL containers. More...
 
+ + + + + + + + + + + + + + + + + + + + + + +

+Functions

void * mi_new (std::size_t n) noexcept(false)
 like mi_malloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_n (size_t count, size_t size) noexcept(false)
 like mi_mallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_aligned (std::size_t n, std::align_val_t alignment) noexcept(false)
 like mi_malloc_aligned(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_nothrow (size_t n)
 like mi_malloc, but when out of memory, use std::get_new_handler but return NULL on failure. More...
 
void * mi_new_aligned_nothrow (size_t n, size_t alignment)
 like mi_malloc_aligned, but when out of memory, use std::get_new_handler but return NULL on failure. More...
 
void * mi_new_realloc (void *p, size_t newsize)
 like mi_realloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_reallocn (void *p, size_t newcount, size_t size)
 like mi_reallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
+

Detailed Description

+

mi_ prefixed implementations of various allocation functions that use C++ semantics on out-of-memory, generally calling std::get_new_handler and raising a std::bad_alloc exception on failure.

+

Note: use the mimalloc-new-delete.h header to override the new and delete operators globally. The wrappers here are mostly for convience for library writers that need to interface with mimalloc from C++.

+

Data Structure Documentation

+ +

◆ mi_stl_allocator

+ +
+
+ + + + +
struct mi_stl_allocator
+
+

template<class T>
+struct mi_stl_allocator< T >

+ +

std::allocator implementation for mimalloc for use in STL containers.

+

For example:

std::vector<int, mi_stl_allocator<int> > vec;
vec.push_back(1);
vec.pop_back();
+
+
+

Function Documentation

+ +

◆ mi_new()

+ +
+
+ + + + + +
+ + + + + + + + +
void* mi_new (std::size_t n)
+
+noexcept
+
+ +

like mi_malloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_aligned()

+ +
+
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
void* mi_new_aligned (std::size_t n,
std::align_val_t alignment 
)
+
+noexcept
+
+ +

like mi_malloc_aligned(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_aligned_nothrow()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void* mi_new_aligned_nothrow (size_t n,
size_t alignment 
)
+
+ +

like mi_malloc_aligned, but when out of memory, use std::get_new_handler but return NULL on failure.

+ +
+
+ +

◆ mi_new_n()

+ +
+
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
void* mi_new_n (size_t count,
size_t size 
)
+
+noexcept
+
+ +

like mi_mallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_nothrow()

+ +
+
+ + + + + + + + +
void* mi_new_nothrow (size_t n)
+
+ +

like mi_malloc, but when out of memory, use std::get_new_handler but return NULL on failure.

+ +
+
+ +

◆ mi_new_realloc()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void* mi_new_realloc (void * p,
size_t newsize 
)
+
+ +

like mi_realloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_reallocn()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_new_reallocn (void * p,
size_t newcount,
size_t size 
)
+
+ +

like mi_reallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+
+
+ + + + diff --git a/docs/group__cpp.js b/docs/group__cpp.js new file mode 100644 index 00000000..20706646 --- /dev/null +++ b/docs/group__cpp.js @@ -0,0 +1,11 @@ +var group__cpp = +[ + [ "mi_stl_allocator", "group__cpp.html#structmi__stl__allocator", null ], + [ "mi_new", "group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545", null ], + [ "mi_new_aligned", "group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3", null ], + [ "mi_new_aligned_nothrow", "group__cpp.html#gab5e29558926d934c3f1cae8c815f942c", null ], + [ "mi_new_n", "group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81", null ], + [ "mi_new_nothrow", "group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a", null ], + [ "mi_new_realloc", "group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e", null ], + [ "mi_new_reallocn", "group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907", null ] +]; \ No newline at end of file diff --git a/docs/group__posix.html b/docs/group__posix.html index eaa4a10f..1aea8dc8 100644 --- a/docs/group__posix.html +++ b/docs/group__posix.html @@ -137,21 +137,6 @@ Functions   void mi_free_aligned (void *p, size_t alignment)   -void * mi_new (std::size_t n) noexcept(false) - raise std::bad_alloc exception on failure. More...
-  -void * mi_new_n (size_t count, size_t size) noexcept(false) - raise std::bad_alloc exception on failure or overflow. More...
-  -void * mi_new_aligned (std::size_t n, std::align_val_t alignment) noexcept(false) - raise std::bad_alloc exception on failure. More...
-  -void * mi_new_nothrow (size_t n) - return NULL on failure. More...
-  -void * mi_new_aligned_nothrow (size_t n, size_t alignment) - return NULL on failure. More...

Detailed Description

mi_ prefixed implementations of various Posix, Unix, and C++ allocation functions.

@@ -391,160 +376,6 @@ Functions
-
-
- -

◆ mi_new()

- -
-
- - - - - -
- - - - - - - - -
void* mi_new (std::size_t n)
-
-noexcept
-
- -

raise std::bad_alloc exception on failure.

- -
-
- -

◆ mi_new_aligned()

- -
-
- - - - - -
- - - - - - - - - - - - - - - - - - -
void* mi_new_aligned (std::size_t n,
std::align_val_t alignment 
)
-
-noexcept
-
- -

raise std::bad_alloc exception on failure.

- -
-
- -

◆ mi_new_aligned_nothrow()

- -
-
- - - - - - - - - - - - - - - - - - -
void* mi_new_aligned_nothrow (size_t n,
size_t alignment 
)
-
- -

return NULL on failure.

- -
-
- -

◆ mi_new_n()

- -
-
- - - - - -
- - - - - - - - - - - - - - - - - - -
void* mi_new_n (size_t count,
size_t size 
)
-
-noexcept
-
- -

raise std::bad_alloc exception on failure or overflow.

- -
-
- -

◆ mi_new_nothrow()

- -
-
- - - - - - - - -
void* mi_new_nothrow (size_t n)
-
- -

return NULL on failure.

-
diff --git a/docs/group__posix.js b/docs/group__posix.js index 0f2b895d..e43453d9 100644 --- a/docs/group__posix.js +++ b/docs/group__posix.js @@ -9,11 +9,6 @@ var group__posix = [ "mi_malloc_size", "group__posix.html#ga4531c9e775bb3ae12db57c1ba8a5d7de", null ], [ "mi_malloc_usable_size", "group__posix.html#ga06d07cf357bbac5c73ba5d0c0c421e17", null ], [ "mi_memalign", "group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e", null ], - [ "mi_new", "group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545", null ], - [ "mi_new_aligned", "group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3", null ], - [ "mi_new_aligned_nothrow", "group__posix.html#gab5e29558926d934c3f1cae8c815f942c", null ], - [ "mi_new_n", "group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81", null ], - [ "mi_new_nothrow", "group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a", null ], [ "mi_posix_memalign", "group__posix.html#gacff84f226ba9feb2031b8992e5579447", null ], [ "mi_pvalloc", "group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e", null ], [ "mi_reallocarray", "group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088", null ], diff --git a/docs/mimalloc-doc_8h_source.html b/docs/mimalloc-doc_8h_source.html index 12d0f799..f70ae81f 100644 --- a/docs/mimalloc-doc_8h_source.html +++ b/docs/mimalloc-doc_8h_source.html @@ -102,20 +102,22 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
mimalloc-doc.h
-
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(void* out);
304 
310 void mi_stats_print(mi_output_fun* out, void* arg);
311 
313 void mi_stats_reset(void);
314 
316 void mi_stats_merge(void);
317 
321 void mi_thread_init(void);
322 
327 void mi_thread_done(void);
328 
334 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
335 
342 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
343 
359 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
360 
366 typedef void (mi_output_fun)(const char* msg, void* arg);
367 
374 void mi_register_output(mi_output_fun* out, void* arg);
375 
381 typedef void (mi_error_fun)(int err, void* arg);
382 
398 void mi_register_error(mi_error_fun* errfun, void* arg);
399 
404 bool mi_is_in_heap_region(const void* p);
405 
406 
419 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
420 
433 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
434 
435 
440 bool mi_is_redirected();
441 
442 
444 
445 // ------------------------------------------------------
446 // Aligned allocation
447 // ------------------------------------------------------
448 
454 
467 void* mi_malloc_aligned(size_t size, size_t alignment);
468 void* mi_zalloc_aligned(size_t size, size_t alignment);
469 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
470 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
471 
482 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
483 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
484 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
485 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
486 
488 
494 
499 struct mi_heap_s;
500 
505 typedef struct mi_heap_s mi_heap_t;
506 
509 
517 void mi_heap_delete(mi_heap_t* heap);
518 
526 void mi_heap_destroy(mi_heap_t* heap);
527 
532 
536 
543 
545 void mi_heap_collect(mi_heap_t* heap, bool force);
546 
549 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
550 
554 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
555 
558 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
559 
562 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
563 
566 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
567 
570 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
571 
574 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
575 
578 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
579 
580 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
581 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
582 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
583 
584 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
585 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
586 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
587 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
588 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
589 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
590 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
591 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
592 
594 
595 
604 
605 void* mi_rezalloc(void* p, size_t newsize);
606 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
607 
608 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
609 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
610 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
611 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
612 
613 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
614 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
615 
616 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
617 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
618 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
619 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
620 
622 
628 
640 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
641 
643 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
644 
646 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
647 
649 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
650 
652 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
653 
655 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
656 
658 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
659 
661 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
662 
664 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
665 
667 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
668 
670 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
671 
673 
679 
686 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
687 
696 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
697 
705 bool mi_check_owned(const void* p);
706 
709 typedef struct mi_heap_area_s {
710  void* blocks;
711  size_t reserved;
712  size_t committed;
713  size_t used;
714  size_t block_size;
716 
724 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
725 
737 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
738 
740 
746 
748 typedef enum mi_option_e {
749  // stable options
753  // the following options are experimental
767 } mi_option_t;
768 
769 
770 bool mi_option_enabled(mi_option_t option);
771 void mi_option_enable(mi_option_t option, bool enable);
772 void mi_option_enable_default(mi_option_t option, bool enable);
773 
774 long mi_option_get(mi_option_t option);
775 void mi_option_set(mi_option_t option, long value);
776 void mi_option_set_default(mi_option_t option, long value);
777 
778 
780 
787 
788 void* mi_recalloc(void* p, size_t count, size_t size);
789 size_t mi_malloc_size(const void* p);
790 size_t mi_malloc_usable_size(const void *p);
791 
793 void mi_cfree(void* p);
794 
795 int mi_posix_memalign(void** p, size_t alignment, size_t size);
796 int mi__posix_memalign(void** p, size_t alignment, size_t size);
797 void* mi_memalign(size_t alignment, size_t size);
798 void* mi_valloc(size_t size);
799 
800 void* mi_pvalloc(size_t size);
801 void* mi_aligned_alloc(size_t alignment, size_t size);
802 void* mi_reallocarray(void* p, size_t count, size_t size);
803 
804 void mi_free_size(void* p, size_t size);
805 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
806 void mi_free_aligned(void* p, size_t alignment);
807 
809 void* mi_new(std::size_t n) noexcept(false);
810 
812 void* mi_new_n(size_t count, size_t size) noexcept(false);
813 
815 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
816 
818 void* mi_new_nothrow(size_t n);
819 ``
821 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
822 
824 
void mi_option_enable_default(mi_option_t option, bool enable)
+
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(void* out);
304 
310 void mi_stats_print(mi_output_fun* out, void* arg);
311 
313 void mi_stats_reset(void);
314 
316 void mi_stats_merge(void);
317 
321 void mi_thread_init(void);
322 
327 void mi_thread_done(void);
328 
334 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
335 
342 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
343 
359 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
360 
366 typedef void (mi_output_fun)(const char* msg, void* arg);
367 
374 void mi_register_output(mi_output_fun* out, void* arg);
375 
381 typedef void (mi_error_fun)(int err, void* arg);
382 
398 void mi_register_error(mi_error_fun* errfun, void* arg);
399 
404 bool mi_is_in_heap_region(const void* p);
405 
406 
419 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
420 
433 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
434 
435 
440 bool mi_is_redirected();
441 
442 
444 
445 // ------------------------------------------------------
446 // Aligned allocation
447 // ------------------------------------------------------
448 
454 
467 void* mi_malloc_aligned(size_t size, size_t alignment);
468 void* mi_zalloc_aligned(size_t size, size_t alignment);
469 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
470 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
471 
482 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
483 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
484 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
485 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
486 
488 
494 
499 struct mi_heap_s;
500 
505 typedef struct mi_heap_s mi_heap_t;
506 
509 
517 void mi_heap_delete(mi_heap_t* heap);
518 
526 void mi_heap_destroy(mi_heap_t* heap);
527 
532 
536 
543 
545 void mi_heap_collect(mi_heap_t* heap, bool force);
546 
549 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
550 
554 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
555 
558 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
559 
562 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
563 
566 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
567 
570 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
571 
574 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
575 
578 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
579 
580 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
581 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
582 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
583 
584 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
585 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
586 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
587 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
588 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
589 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
590 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
591 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
592 
594 
595 
604 
605 void* mi_rezalloc(void* p, size_t newsize);
606 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
607 
608 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
609 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
610 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
611 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
612 
613 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
614 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
615 
616 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
617 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
618 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
619 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
620 
622 
628 
640 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
641 
643 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
644 
646 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
647 
649 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
650 
652 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
653 
655 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
656 
658 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
659 
661 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
662 
664 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
665 
667 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
668 
670 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
671 
673 
679 
686 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
687 
696 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
697 
705 bool mi_check_owned(const void* p);
706 
709 typedef struct mi_heap_area_s {
710  void* blocks;
711  size_t reserved;
712  size_t committed;
713  size_t used;
714  size_t block_size;
716 
724 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
725 
737 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
738 
740 
746 
748 typedef enum mi_option_e {
749  // stable options
753  // the following options are experimental
767 } mi_option_t;
768 
769 
770 bool mi_option_enabled(mi_option_t option);
771 void mi_option_enable(mi_option_t option, bool enable);
772 void mi_option_enable_default(mi_option_t option, bool enable);
773 
774 long mi_option_get(mi_option_t option);
775 void mi_option_set(mi_option_t option, long value);
776 void mi_option_set_default(mi_option_t option, long value);
777 
778 
780 
787 
788 void* mi_recalloc(void* p, size_t count, size_t size);
789 size_t mi_malloc_size(const void* p);
790 size_t mi_malloc_usable_size(const void *p);
791 
793 void mi_cfree(void* p);
794 
795 int mi_posix_memalign(void** p, size_t alignment, size_t size);
796 int mi__posix_memalign(void** p, size_t alignment, size_t size);
797 void* mi_memalign(size_t alignment, size_t size);
798 void* mi_valloc(size_t size);
799 
800 void* mi_pvalloc(size_t size);
801 void* mi_aligned_alloc(size_t alignment, size_t size);
802 void* mi_reallocarray(void* p, size_t count, size_t size);
803 
804 void mi_free_size(void* p, size_t size);
805 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
806 void mi_free_aligned(void* p, size_t alignment);
807 
809 
822 
824 void* mi_new(std::size_t n) noexcept(false);
825 
827 void* mi_new_n(size_t count, size_t size) noexcept(false);
828 
830 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
831 
833 void* mi_new_nothrow(size_t n);
834 
836 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
837 
839 void* mi_new_realloc(void* p, size_t newsize);
840 
842 void* mi_new_reallocn(void* p, size_t newcount, size_t size);
843 
851 template<class T> struct mi_stl_allocator { }
852 
854 
void mi_option_enable_default(mi_option_t option, bool enable)
size_t mi_usable_size(void *p)
Return the available bytes in a memory block.
+
void * mi_new_nothrow(size_t n)
like mi_malloc, but when out of memory, use std::get_new_handler but return NULL on failure.
void * mi_reallocn(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes.
void * mi_malloc_aligned(size_t size, size_t alignment)
Allocate size bytes aligned by alignment.
void * mi_recalloc_aligned_at(void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
void mi_stats_reset(void)
Reset statistics.
void * mi_heap_realloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
+
void * mi_new_realloc(void *p, size_t newsize)
like mi_realloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exceptio...
void * mi_recalloc(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes, with extra memory initialized to zero.
void * mi_mallocn(size_t count, size_t size)
Allocate count elements of size bytes.
size_t mi_malloc_size(const void *p)
int mi_posix_memalign(void **p, size_t alignment, size_t size)
void mi_stats_merge(void)
Merge thread local statistics with the main statistics and reset.
+
void * mi_new_n(size_t count, size_t size) noexcept(false)
like mi_mallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exceptio...
void mi_option_set_default(mi_option_t option, long value)
-
void * mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false)
raise std::bad_alloc exception on failure.
void() mi_error_fun(int err, void *arg)
Type of error callback functions.
Definition: mimalloc-doc.h:381
void * mi_rezalloc(void *p, size_t newsize)
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:754
@@ -127,7 +129,6 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
Definition: mimalloc-doc.h:766
void * mi_realloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:710
-
void * mi_new_n(size_t count, size_t size) noexcept(false)
raise std::bad_alloc exception on failure or overflow.
void * mi_realloc_aligned(void *p, size_t newsize, size_t alignment)
int mi__posix_memalign(void **p, size_t alignment, size_t size)
void mi_free(void *p)
Free previously allocated memory.
@@ -145,7 +146,6 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void * mi_heap_rezalloc(mi_heap_t *heap, void *p, size_t newsize)
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:758
void * mi_heap_calloc(mi_heap_t *heap, size_t count, size_t size)
Allocate count zero-initialized elements in a specific heap.
-
void * mi_new(std::size_t n) noexcept(false)
raise std::bad_alloc exception on failure.
void * mi_heap_calloc_aligned(mi_heap_t *heap, size_t count, size_t size, size_t alignment)
bool mi_is_redirected()
Is the C runtime malloc API redirected?
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:714
@@ -154,6 +154,7 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
Type of deferred free functions.
Definition: mimalloc-doc.h:342
bool mi_is_in_heap_region(const void *p)
Is a pointer part of our heap?
void mi_option_enable(mi_option_t option, bool enable)
+
void * mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false)
like mi_malloc_aligned(), but when out of memory, use std::get_new_handler and raise std::bad_alloc e...
void * mi_realloc(void *p, size_t newsize)
Re-allocate memory to newsize bytes.
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:757
void * mi_heap_reallocf(mi_heap_t *heap, void *p, size_t newsize)
@@ -175,9 +176,8 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
Print error messages to stderr.
Definition: mimalloc-doc.h:751
Experimental.
Definition: mimalloc-doc.h:760
void * mi_heap_rezalloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
+
void * mi_new_aligned_nothrow(size_t n, size_t alignment)
like mi_malloc_aligned, but when out of memory, use std::get_new_handler but return NULL on failure.
void * mi_memalign(size_t alignment, size_t size)
-
void * mi_new_aligned_nothrow(size_t n, size_t alignment)
return NULL on failure.
-
void * mi_new_nothrow(size_t n)
return NULL on failure.
void * mi_rezalloc_aligned(void *p, size_t newsize, size_t alignment)
bool mi_heap_contains_block(mi_heap_t *heap, const void *p)
Does a heap contain a pointer to a previously allocated block?
void mi_heap_collect(mi_heap_t *heap, bool force)
Release outstanding resources in a specific heap.
@@ -207,11 +207,13 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void mi_register_deferred_free(mi_deferred_free_fun *deferred_free, void *arg)
Register a deferred free function.
void mi_free_size(void *p, size_t size)
void mi_collect(bool force)
Eagerly free memory.
+
void * mi_new_reallocn(void *p, size_t newcount, size_t size)
like mi_reallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc excepti...
void mi_heap_destroy(mi_heap_t *heap)
Destroy a heap, freeing all its still allocated blocks.
void * mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset)
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:756
void * mi_heap_reallocn(mi_heap_t *heap, void *p, size_t count, size_t size)
void mi_register_output(mi_output_fun *out, void *arg)
Register an output function.
+
std::allocator implementation for mimalloc for use in STL containers.
Definition: mimalloc-doc.h:851
void * mi_heap_malloc_small(mi_heap_t *heap, size_t size)
Allocate a small object in a specific heap.
void * mi_heap_realloc(mi_heap_t *heap, void *p, size_t newsize)
size_t mi_malloc_usable_size(const void *p)
@@ -227,6 +229,7 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
long mi_option_get(mi_option_t option)
mi_heap_t * mi_heap_get_backing()
Get the backing heap.
void mi_free_aligned(void *p, size_t alignment)
+
void * mi_new(std::size_t n) noexcept(false)
like mi_malloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception...
Delay in milli-seconds before resetting a page (100ms by default)
Definition: mimalloc-doc.h:761
mi_heap_t * mi_heap_new()
Create a new heap that can be used for allocation.
void * mi_heap_malloc(mi_heap_t *heap, size_t size)
Allocate in a specific heap.
diff --git a/docs/modules.html b/docs/modules.html index 0bc6036d..91bf17e8 100644 --- a/docs/modules.html +++ b/docs/modules.html @@ -113,6 +113,7 @@ $(document).ready(function(){initNavTree('modules.html','');});  Heap IntrospectionInspect the heap at runtime  Runtime OptionsSet runtime behavior  Posixmi_ prefixed implementations of various Posix, Unix, and C++ allocation functions + C++ wrappersmi_ prefixed implementations of various allocation functions that use C++ semantics on out-of-memory, generally calling std::get_new_handler and raising a std::bad_alloc exception on failure
diff --git a/docs/modules.js b/docs/modules.js index 47e99b42..b2c2a224 100644 --- a/docs/modules.js +++ b/docs/modules.js @@ -8,5 +8,6 @@ var modules = [ "Typed Macros", "group__typed.html", "group__typed" ], [ "Heap Introspection", "group__analysis.html", "group__analysis" ], [ "Runtime Options", "group__options.html", "group__options" ], - [ "Posix", "group__posix.html", "group__posix" ] + [ "Posix", "group__posix.html", "group__posix" ], + [ "C++ wrappers", "group__cpp.html", "group__cpp" ] ]; \ No newline at end of file diff --git a/docs/navtreeindex0.js b/docs/navtreeindex0.js index e2667728..047d6dbc 100644 --- a/docs/navtreeindex0.js +++ b/docs/navtreeindex0.js @@ -28,6 +28,15 @@ var NAVTREEINDEX0 = "group__analysis.html#gaa862aa8ed8d57d84cae41fc1022d71af":[5,6,4], "group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65":[5,6,1], "group__analysis.html#structmi__heap__area__t":[5,6,0], +"group__cpp.html":[5,9], +"group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907":[5,9,7], +"group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e":[5,9,6], +"group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545":[5,9,1], +"group__cpp.html#gab5e29558926d934c3f1cae8c815f942c":[5,9,3], +"group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81":[5,9,4], +"group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a":[5,9,5], +"group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3":[5,9,2], +"group__cpp.html#structmi__stl__allocator":[5,9,0], "group__extended.html":[5,1], "group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,21], "group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,18], @@ -123,20 +132,15 @@ var NAVTREEINDEX0 = "group__posix.html#ga0d28d5cf61e6bfbb18c63092939fe5c9":[5,8,3], "group__posix.html#ga1326d2e4388630b5f81ca7206318b8e5":[5,8,1], "group__posix.html#ga4531c9e775bb3ae12db57c1ba8a5d7de":[5,8,6], -"group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088":[5,8,16], +"group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088":[5,8,11], "group__posix.html#ga705dc7a64bffacfeeb0141501a5c35d7":[5,8,2], "group__posix.html#ga72e9d7ffb5fe94d69bc722c8506e27bc":[5,8,5], -"group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b":[5,8,17], +"group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b":[5,8,12], "group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e":[5,8,8], -"group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545":[5,8,9], -"group__posix.html#gab5e29558926d934c3f1cae8c815f942c":[5,8,11], -"group__posix.html#gacff84f226ba9feb2031b8992e5579447":[5,8,14], +"group__posix.html#gacff84f226ba9feb2031b8992e5579447":[5,8,9], "group__posix.html#gad5a69c8fea96aa2b7a7c818c2130090a":[5,8,0], "group__posix.html#gae01389eedab8d67341ff52e2aad80ebb":[5,8,4], -"group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81":[5,8,12], -"group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a":[5,8,13], -"group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e":[5,8,15], -"group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3":[5,8,10], +"group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e":[5,8,10], "group__typed.html":[5,5], "group__typed.html#ga0619a62c5fd886f1016030abe91f0557":[5,5,7], "group__typed.html#ga1158b49a55dfa81f58a4426a7578f523":[5,5,9], diff --git a/docs/search/all_3.js b/docs/search/all_3.js index af76e9c8..2e08411f 100644 --- a/docs/search/all_3.js +++ b/docs/search/all_3.js @@ -1,4 +1,5 @@ var searchData= [ - ['committed',['committed',['../group__analysis.html#ab47526df656d8837ec3e97f11b83f835',1,'mi_heap_area_t']]] + ['committed',['committed',['../group__analysis.html#ab47526df656d8837ec3e97f11b83f835',1,'mi_heap_area_t']]], + ['c_2b_2b_20wrappers',['C++ wrappers',['../group__cpp.html',1,'']]] ]; diff --git a/docs/search/all_6.js b/docs/search/all_6.js index 7af11c0f..c757cbbf 100644 --- a/docs/search/all_6.js +++ b/docs/search/all_6.js @@ -73,11 +73,13 @@ var searchData= ['mi_5fmallocn',['mi_mallocn',['../group__malloc.html#ga0b05e2bf0f73e7401ae08597ff782ac6',1,'mimalloc-doc.h']]], ['mi_5fmallocn_5ftp',['mi_mallocn_tp',['../group__typed.html#gae5cb6e0fafc9f23169c5622e077afe8b',1,'mimalloc-doc.h']]], ['mi_5fmemalign',['mi_memalign',['../group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e',1,'mimalloc-doc.h']]], - ['mi_5fnew',['mi_new',['../group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], - ['mi_5fnew_5fn',['mi_new_n',['../group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], - ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], + ['mi_5fnew',['mi_new',['../group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned',['mi_new_aligned',['../group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__cpp.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fn',['mi_new_n',['../group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], + ['mi_5fnew_5frealloc',['mi_new_realloc',['../group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e',1,'mimalloc-doc.h']]], + ['mi_5fnew_5freallocn',['mi_new_reallocn',['../group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit',['mi_option_eager_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit_5fdelay',['mi_option_eager_commit_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fregion_5fcommit',['mi_option_eager_region_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad',1,'mimalloc-doc.h']]], @@ -126,6 +128,7 @@ var searchData= ['mi_5fstats_5fmerge',['mi_stats_merge',['../group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1',1,'mimalloc-doc.h']]], ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mi_stats_print(void *out): mimalloc-doc.h'],['../group__extended.html#ga256cc6f13a142deabbadd954a217e228',1,'mi_stats_print(mi_output_fun *out, void *arg): mimalloc-doc.h']]], ['mi_5fstats_5freset',['mi_stats_reset',['../group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99',1,'mimalloc-doc.h']]], + ['mi_5fstl_5fallocator',['mi_stl_allocator',['../group__cpp.html#structmi__stl__allocator',1,'']]], ['mi_5fstrdup',['mi_strdup',['../group__malloc.html#gac7cffe13f1f458ed16789488bf92b9b2',1,'mimalloc-doc.h']]], ['mi_5fstrndup',['mi_strndup',['../group__malloc.html#gaaabf971c2571891433477e2d21a35266',1,'mimalloc-doc.h']]], ['mi_5fthread_5fdone',['mi_thread_done',['../group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf',1,'mimalloc-doc.h']]], diff --git a/docs/search/classes_0.js b/docs/search/classes_0.js index 4c5482b9..0010dd97 100644 --- a/docs/search/classes_0.js +++ b/docs/search/classes_0.js @@ -1,4 +1,5 @@ var searchData= [ - ['mi_5fheap_5farea_5ft',['mi_heap_area_t',['../group__analysis.html#structmi__heap__area__t',1,'']]] + ['mi_5fheap_5farea_5ft',['mi_heap_area_t',['../group__analysis.html#structmi__heap__area__t',1,'']]], + ['mi_5fstl_5fallocator',['mi_stl_allocator',['../group__cpp.html#structmi__stl__allocator',1,'']]] ]; diff --git a/docs/search/functions_0.js b/docs/search/functions_0.js index 098041bb..6271797a 100644 --- a/docs/search/functions_0.js +++ b/docs/search/functions_0.js @@ -59,11 +59,13 @@ var searchData= ['mi_5fmalloc_5fusable_5fsize',['mi_malloc_usable_size',['../group__posix.html#ga06d07cf357bbac5c73ba5d0c0c421e17',1,'mimalloc-doc.h']]], ['mi_5fmallocn',['mi_mallocn',['../group__malloc.html#ga0b05e2bf0f73e7401ae08597ff782ac6',1,'mimalloc-doc.h']]], ['mi_5fmemalign',['mi_memalign',['../group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e',1,'mimalloc-doc.h']]], - ['mi_5fnew',['mi_new',['../group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], - ['mi_5fnew_5fn',['mi_new_n',['../group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], - ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], + ['mi_5fnew',['mi_new',['../group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned',['mi_new_aligned',['../group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__cpp.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fn',['mi_new_n',['../group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], + ['mi_5fnew_5frealloc',['mi_new_realloc',['../group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e',1,'mimalloc-doc.h']]], + ['mi_5fnew_5freallocn',['mi_new_reallocn',['../group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907',1,'mimalloc-doc.h']]], ['mi_5foption_5fenable',['mi_option_enable',['../group__options.html#ga6d45a20a3131f18bc351b69763b38ce4',1,'mimalloc-doc.h']]], ['mi_5foption_5fenable_5fdefault',['mi_option_enable_default',['../group__options.html#ga37988264b915a7db92530cc02d5494cb',1,'mimalloc-doc.h']]], ['mi_5foption_5fenabled',['mi_option_enabled',['../group__options.html#gacebe3f6d91b4a50b54eb84e2a1da1b30',1,'mimalloc-doc.h']]], diff --git a/docs/search/groups_2.js b/docs/search/groups_2.js index 68c73dbe..29185761 100644 --- a/docs/search/groups_2.js +++ b/docs/search/groups_2.js @@ -1,4 +1,4 @@ var searchData= [ - ['extended_20functions',['Extended Functions',['../group__extended.html',1,'']]] + ['c_2b_2b_20wrappers',['C++ wrappers',['../group__cpp.html',1,'']]] ]; diff --git a/docs/search/groups_3.js b/docs/search/groups_3.js index e7e40934..68c73dbe 100644 --- a/docs/search/groups_3.js +++ b/docs/search/groups_3.js @@ -1,5 +1,4 @@ var searchData= [ - ['heap_20introspection',['Heap Introspection',['../group__analysis.html',1,'']]], - ['heap_20allocation',['Heap Allocation',['../group__heap.html',1,'']]] + ['extended_20functions',['Extended Functions',['../group__extended.html',1,'']]] ]; diff --git a/docs/search/groups_4.js b/docs/search/groups_4.js index 4f005682..e7e40934 100644 --- a/docs/search/groups_4.js +++ b/docs/search/groups_4.js @@ -1,4 +1,5 @@ var searchData= [ - ['posix',['Posix',['../group__posix.html',1,'']]] + ['heap_20introspection',['Heap Introspection',['../group__analysis.html',1,'']]], + ['heap_20allocation',['Heap Allocation',['../group__heap.html',1,'']]] ]; diff --git a/docs/search/groups_5.js b/docs/search/groups_5.js index 2533cb94..4f005682 100644 --- a/docs/search/groups_5.js +++ b/docs/search/groups_5.js @@ -1,4 +1,4 @@ var searchData= [ - ['runtime_20options',['Runtime Options',['../group__options.html',1,'']]] + ['posix',['Posix',['../group__posix.html',1,'']]] ]; diff --git a/docs/search/groups_6.js b/docs/search/groups_6.js index 647887f5..2533cb94 100644 --- a/docs/search/groups_6.js +++ b/docs/search/groups_6.js @@ -1,4 +1,4 @@ var searchData= [ - ['typed_20macros',['Typed Macros',['../group__typed.html',1,'']]] + ['runtime_20options',['Runtime Options',['../group__options.html',1,'']]] ]; diff --git a/docs/search/groups_7.js b/docs/search/groups_7.js index 2b9b4cea..647887f5 100644 --- a/docs/search/groups_7.js +++ b/docs/search/groups_7.js @@ -1,4 +1,4 @@ var searchData= [ - ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] + ['typed_20macros',['Typed Macros',['../group__typed.html',1,'']]] ]; diff --git a/docs/search/groups_8.html b/docs/search/groups_8.html new file mode 100644 index 00000000..81ac9508 --- /dev/null +++ b/docs/search/groups_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/groups_8.js b/docs/search/groups_8.js new file mode 100644 index 00000000..2b9b4cea --- /dev/null +++ b/docs/search/groups_8.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] +]; diff --git a/docs/search/searchdata.js b/docs/search/searchdata.js index 919719e9..dd31068e 100644 --- a/docs/search/searchdata.js +++ b/docs/search/searchdata.js @@ -7,7 +7,7 @@ var indexSectionsWithContent = 4: "m", 5: "m", 6: "_m", - 7: "abehprtz", + 7: "abcehprtz", 8: "beopu" }; diff --git a/docs/using.html b/docs/using.html index eae37a5e..c5dc12e7 100644 --- a/docs/using.html +++ b/docs/using.html @@ -106,7 +106,7 @@ $(document).ready(function(){initNavTree('using.html','');});

The preferred usage is including <mimalloc.h>, linking with the shared- or static library, and using the mi_malloc API exclusively for allocation. For example,

gcc -o myprogram -lmimalloc myfile.c

mimalloc uses only safe OS calls (mmap and VirtualAlloc) and can co-exist with other allocators linked to the same program. If you use cmake, you can simply use:

find_package(mimalloc 1.0 REQUIRED)

in your CMakeLists.txt to find a locally installed mimalloc. Then use either:

target_link_libraries(myapp PUBLIC mimalloc)

to link with the shared (dynamic) library, or:

target_link_libraries(myapp PUBLIC mimalloc-static)

to link with the static library. See test\CMakeLists.txt for an example.

C++

For best performance in C++ programs, it is also recommended to override the global new and delete operators. For convience, mimalloc provides mimalloc-new-delete.h which does this for you – just include it in a single(!) source file in your project.

-

In C++, mimalloc also provides the mi_stl_allocator struct which implements the std::allocator interface. For example:

std::vector<some_struct, mi_stl_allocator<some_struct>> vec;
vec.push_back(some_struct());

Statistics

+

In C++, mimalloc also provides the mi_stl_allocator struct which implements the std::allocator interface. For example:

std::vector<some_struct, mi_stl_allocator<some_struct>> vec;
vec.push_back(some_struct());

Statistics

You can pass environment variables to print verbose messages (MIMALLOC_VERBOSE=1) and statistics (MIMALLOC_SHOW_STATS=1) (in the debug version):

> env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363
175451865205073170563711388363 = 374456281610909315237213 * 468551
heap stats: peak total freed unit
normal 2: 16.4 kb 17.5 mb 17.5 mb 16 b ok
normal 3: 16.3 kb 15.2 mb 15.2 mb 24 b ok
normal 4: 64 b 4.6 kb 4.6 kb 32 b ok
normal 5: 80 b 118.4 kb 118.4 kb 40 b ok
normal 6: 48 b 48 b 48 b 48 b ok
normal 17: 960 b 960 b 960 b 320 b ok
heap stats: peak total freed unit
normal: 33.9 kb 32.8 mb 32.8 mb 1 b ok
huge: 0 b 0 b 0 b 1 b ok
total: 33.9 kb 32.8 mb 32.8 mb 1 b ok
malloc requested: 32.8 mb
committed: 58.2 kb 58.2 kb 58.2 kb 1 b ok
reserved: 2.0 mb 2.0 mb 2.0 mb 1 b ok
reset: 0 b 0 b 0 b 1 b ok
segments: 1 1 1
-abandoned: 0
pages: 6 6 6
-abandoned: 0
mmaps: 3
mmap fast: 0
mmap slow: 1
threads: 0
elapsed: 2.022s
process: user: 1.781s, system: 0.016s, faults: 756, reclaims: 0, rss: 2.7 mb

The above model of using the mi_ prefixed API is not always possible though in existing programs that already use the standard malloc interface, and another option is to override the standard malloc interface completely and redirect all calls to the mimalloc library instead.

See Overriding Malloc for more info.

diff --git a/test/main-override.cpp b/test/main-override.cpp index d082ade3..fcf3970f 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -71,16 +71,16 @@ static Static s = Static(); bool test_stl_allocator1() { - std::vector> vec; + std::vector > vec; vec.push_back(1); vec.pop_back(); return vec.size() == 0; } -bool test_stl_allocator2() { - struct some_struct { int i; int j; double z; }; +struct some_struct { int i; int j; double z; }; - std::vector> vec; +bool test_stl_allocator2() { + std::vector > vec; vec.push_back(some_struct()); vec.pop_back(); return vec.size() == 0; From af2cfe255a9e4e3eb27f8ad4b13a64ebc441fde6 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 15:05:02 -0800 Subject: [PATCH 202/293] add updated benchmarks --- readme.md | 208 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 117 insertions(+), 91 deletions(-) diff --git a/readme.md b/readme.md index b6258cfc..c5c71ac4 100644 --- a/readme.md +++ b/readme.md @@ -313,68 +313,71 @@ under your control or otherwise mixing of pointers from different heaps may occu # Performance +Last update: 2020-01-20 + We tested _mimalloc_ against many other top allocators over a wide range of benchmarks, ranging from various real world programs to synthetic benchmarks that see how the allocator behaves under more -extreme circumstances. +extreme circumstances. In our benchmark suite, _mimalloc_ outperforms other leading +allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and has a similar memory footprint. A nice property is that it +does consistently well over the wide range of benchmarks. -In our benchmarks, _mimalloc_ always outperforms all other leading -allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and usually uses less -memory (up to 25% more in the worst case). A nice property is that it -does *consistently* well over the wide range of benchmarks. - -Allocators are interesting as there exists no algorithm that is generally +General memory allocators are interesting as there exists no algorithm that is optimal -- for a given allocator one can usually construct a workload where it does not do so well. The goal is thus to find an allocation strategy that performs well over a wide range of benchmarks without -suffering from underperformance in less common situations (which is what -the second half of our benchmark set tests for). +suffering from (too much) underperformance in less common situations. -We show here only the results on an AMD EPYC system (Apr 2019) -- for -specific details and further benchmarks we refer to the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action). +As always, interpret these results with care since some benchmarks test synthetic +or uncommon situations that may never apply to your workloads. For example, most +allocators do not do well on `xmalloc-testN` but that includes the best +industrial allocators like _jemalloc_ and _tcmalloc_ that are used in some of +the world's largest systems (like Chrome or FreeBSD). -The benchmark suite is scripted and available separately +We show here only an overview -- for +more specific details and further benchmarks we refer to the +[technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action). +The benchmark suite is automated and available separately as [mimalloc-bench](https://github.com/daanx/mimalloc-bench). -## Benchmark Results +## Benchmark Results on 36-core Intel -Testing on a big Amazon EC2 instance ([r5a.4xlarge](https://aws.amazon.com/ec2/instance-types/)) -consisting of a 16-core AMD EPYC 7000 at 2.5GHz -with 128GB ECC memory, running Ubuntu 18.04.1 with LibC 2.27 and GCC 7.3.0. -The measured allocators are _mimalloc_ (mi), -Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (tc) used in Chrome, -[_jemalloc_](https://github.com/jemalloc/jemalloc) (je) by Jason Evans used in Firefox and FreeBSD, -[_snmalloc_](https://github.com/microsoft/snmalloc) (sn) by Liétar et al. \[8], [_rpmalloc_](https://github.com/rampantpixels/rpmalloc) (rp) by Mattias Jansson at Rampant Pixels, -[_Hoard_](https://github.com/emeryberger/Hoard) by Emery Berger \[1], -the system allocator (glibc) (based on _PtMalloc2_), and the Intel thread -building blocks [allocator](https://github.com/intel/tbb) (tbb). +Testing on a big Amazon EC2 compute instance +([c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized)) +consisting of a 72 processor Intel Xeon at 3GHz +with 144GiB ECC memory, running Ubuntu 18.04.1 with LibC 2.27 and GCC 7.4.0. +The measured allocators are _mimalloc_ (mi, tag:v1.4.0, page reset enabled) +and its secure build as _smi_, +Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (tc, tag:gperftools-2.7) used in Chrome, +Facebook's [_jemalloc_](https://github.com/jemalloc/jemalloc) (je, tag:5.2.1) by Jason Evans used in Firefox and FreeBSD, +the Intel thread building blocks [allocator](https://github.com/intel/tbb) (tbb, tag:2020), +[rpmalloc](https://github.com/mjansson/rpmalloc) (rp,tag:1.4.0) by Mattias Jansson, +the original scalable [_Hoard_](https://github.com/emeryberger/Hoard) (tag:3.13) allocator by Emery Berger \[1], +the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:51222e7) allocator by +Bobby Powers _et al_ \[8], +and finally the default system allocator (glibc, 2.7.0) (based on _PtMalloc2_). -![bench-r5a-1](doc/bench-r5a-1.svg) -![bench-r5a-2](doc/bench-r5a-2.svg) +![bench-c5-18xlarge-a](doc/bench-c5-18xlarge-2020-01-20-a.svq) +![bench-c5-18xlarge-b](doc/bench-c5-18xlarge-2020-01-20-b.svq) -Memory usage: +Any benchmarks ending in `N` run on all processors in parallel. +Results are averaged over 10 runs and reported relative +to mimalloc (where 1.2 means it took 1.2× longer to run). +The legend also contains the _overall relative score_ between the +allocators where 100 points is the maximum if an allocator is fastest on +all benchmarks. -![bench-r5a-rss-1](doc/bench-r5a-rss-1.svg) -![bench-r5a-rss-1](doc/bench-r5a-rss-2.svg) +The single threaded _cfrac_ benchmark by Dave Barrett is an implementation of +continued fraction factorization which uses many small short-lived allocations. +All allocators do well on such common usage, where _mimalloc_ is just a tad +faster than _tcmalloc_ and +_jemalloc_. -(note: the _xmalloc-testN_ memory usage should be disregarded as it -allocates more the faster the program runs). - -In the first five benchmarks we can see _mimalloc_ outperforms the other -allocators moderately, but we also see that all these modern allocators -perform well -- the times of large performance differences in regular -workloads are over :-). -In _cfrac_ and _espresso_, _mimalloc_ is a tad faster than _tcmalloc_ and -_jemalloc_, but a solid 10\% faster than all other allocators on -_espresso_. The _tbb_ allocator does not do so well here and lags more than -20\% behind _mimalloc_. The _cfrac_ and _espresso_ programs do not use much -memory (~1.5MB) so it does not matter too much, but still _mimalloc_ uses -about half the resident memory of _tcmalloc_. - -The _leanN_ program is most interesting as a large realistic and -concurrent workload of the [Lean](https://github.com/leanprover/lean) theorem prover -compiling its own standard library, and there is a 8% speedup over _tcmalloc_. This is +The _leanN_ program is interesting as a large realistic and +concurrent workload of the [Lean](https://github.com/leanprover/lean) +theorem prover compiling its own standard library, and there is a 7% +speedup over _tcmalloc_. This is quite significant: if Lean spends 20% of its time in the allocator that means that _mimalloc_ is 1.3× faster than _tcmalloc_ here. (This is surprising as that is not measured in a pure @@ -383,19 +386,23 @@ outsized improvement here because _mimalloc_ has better locality in the allocation which improves performance for the *other* computations in a program as well). -The _redis_ benchmark shows more differences between the allocators where -_mimalloc_ is 14\% faster than _jemalloc_. On this benchmark _tbb_ (and _Hoard_) do -not do well and are over 40\% slower. +The single threaded _redis_ benchmark again show that most allocators do well on such workloads where _tcmalloc_ +did best this time. -The _larson_ server workload allocates and frees objects between -many threads. Larson and Krishnan \[2] observe this -behavior (which they call _bleeding_) in actual server applications, and the -benchmark simulates this. -Here, _mimalloc_ is more than 2.5× faster than _tcmalloc_ and _jemalloc_ -due to the object migration between different threads. This is a difficult -benchmark for other allocators too where _mimalloc_ is still 48% faster than the next -fastest (_snmalloc_). +The _larsonN_ server benchmark by Larson and Krishnan \[2] allocates and frees between threads. They observed this +behavior (which they call _bleeding_) in actual server applications, and the benchmark simulates this. +Here, _mimalloc_ is quite a bit faster than _tcmalloc_ and _jemalloc_ probably due to the object migration between different threads. +The _mstressN_ workload performs many allocations and re-allocations, +and migrates objects between threads (as in _larsonN_). However, it also +creates and destroys the _N_ worker threads a few times keeping some objects +alive beyond the life time of the allocating thread. We observed this +behavior in many larger server applications. + +The [_rptestN_](https://github.com/mjansson/rpmalloc-benchmark) benchmark +by Mattias Jansson is a allocator test originally designed +for _rpmalloc_, and tries to simulate realistic allocation patterns over +multiple threads. Here the differences between allocators become more apparent. The second benchmark set tests specific aspects of the allocators and shows even more extreme differences between them. @@ -404,46 +411,62 @@ The _alloc-test_, by [OLogN Technologies AG](http://ithare.com/testing-memory-allocators-ptmalloc2-tcmalloc-hoard-jemalloc-while-trying-to-simulate-real-world-loads/), is a very allocation intensive benchmark doing millions of allocations in various size classes. The test is scaled such that when an allocator performs almost identically on _alloc-test1_ as _alloc-testN_ it -means that it scales linearly. Here, _tcmalloc_, _snmalloc_, and -_Hoard_ seem to scale less well and do more than 10% worse on the -multi-core version. Even the best allocators (_tcmalloc_ and _jemalloc_) are -more than 10% slower as _mimalloc_ here. +means that it scales linearly. Here, _tcmalloc_, and +_Hoard_ seem to scale less well and do more than 10% worse on the multi-core version. Even the best industrial +allocators (_tcmalloc_, _jemalloc_, and _tbb_) are more than 10% slower as _mimalloc_ here. The _sh6bench_ and _sh8bench_ benchmarks are developed by [MicroQuill](http://www.microquill.com/) as part of SmartHeap. In _sh6bench_ _mimalloc_ does much -better than the others (more than 2× faster than _jemalloc_). +better than the others (more than 1.5× faster than _jemalloc_). We cannot explain this well but believe it is caused in part by the "reverse" free-ing pattern in _sh6bench_. -Again in _sh8bench_ the _mimalloc_ allocator handles object migration -between threads much better and is over 36% faster than the next best -allocator, _snmalloc_. Whereas _tcmalloc_ did well on _sh6bench_, the -addition of object migration caused it to be almost 3 times slower -than before. +The _sh8bench_ is a variation with object migration +between threads; whereas _tcmalloc_ did well on _sh6bench_, the addition of object migration causes it to be 10× slower than before. -The _xmalloc-testN_ benchmark by Lever and Boreham \[5] and Christian Eder, -simulates an asymmetric workload where -some threads only allocate, and others only free. The _snmalloc_ -allocator was especially developed to handle this case well as it -often occurs in concurrent message passing systems (like the [Pony] language -for which _snmalloc_ was initially developed). Here we see that +The _xmalloc-testN_ benchmark by Lever and Boreham \[5] and Christian Eder, simulates an asymmetric workload where +some threads only allocate, and others only free -- they observed this pattern in +larger server applications. Here we see that the _mimalloc_ technique of having non-contended sharded thread free -lists pays off as it even outperforms _snmalloc_ here. -Only _jemalloc_ also handles this reasonably well, while the -others underperform by a large margin. +lists pays off as it outperforms others by a very large margin. Only _rpmalloc_ and _tbb_ also scale well on this benchmark. -The _cache-scratch_ benchmark by Emery Berger \[1], and introduced with the Hoard -allocator to test for _passive-false_ sharing of cache lines. With a single thread they all +The _cache-scratch_ benchmark by Emery Berger \[1], and introduced with +the Hoard allocator to test for _passive-false_ sharing of cache lines. +With a single thread they all perform the same, but when running with multiple threads the potential allocator -induced false sharing of the cache lines causes large run-time -differences, where _mimalloc_ is more than 18× faster than _jemalloc_ and -_tcmalloc_! Crundal \[6] describes in detail why the false cache line -sharing occurs in the _tcmalloc_ design, and also discusses how this +induced false sharing of the cache lines can cause large run-time differences. +Crundal \[6] describes in detail why the false cache line sharing occurs in the _tcmalloc_ design, and also discusses how this can be avoided with some small implementation changes. -Only _snmalloc_ and _tbb_ also avoid the -cache line sharing like _mimalloc_. Kukanov and Voss \[7] describe in detail +Only the _tbb_, _rpmalloc_ and _mesh_ allocators also avoid the +cache line sharing completely, while _Hoard_ and _glibc_ seem to mitigate +the effects. Kukanov and Voss \[7] describe in detail how the design of _tbb_ avoids the false cache line sharing. +## On 24-core AMD Epyc + +For completeness, here are the results on a +[r5a.12xlarge](https://aws.amazon.com/ec2/instance-types/#Memory_Optimized) instance +having a 48 processor AMD Epyc 7000 at 2.5GHz with 384GiB of memory. +The results are similar to the Intel results but it is interesting to +see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks. + +![bench-r5a-12xlarge-a](doc/bench-r5a-12xlarge-2020-01-16-a.svq) +![bench-r5a-12xlarge-b](doc/bench-r5a-12xlarge-2020-01-16-b.svq) + + +## Peak Working Set + +The following figure shows the peak working set (rss) of the allocators +on the benchmarks (on the c5.18xlarge instance). + +![bench-c5-18xlarge-rss-a](doc/bench-c5-18xlarge-2020-01-20-rss-a.svq) +![bench-c5-18xlarge-rss-b](doc/bench-c5-18xlarge-2020-01-20-rss-b.svq) + +Note that the _xmalloc-testN_ memory usage should be disregarded as it +allocates more the faster the program runs. Similarly, memory usage of +_mstressN_, _rptestN_ and _sh8bench_ can vary depending on scheduling and +speed. Nevertheless, even though _mimalloc_ is fast on these benchmarks we +believe the memory usage is too high and hope to improve. # References @@ -453,14 +476,12 @@ how the design of _tbb_ avoids the false cache line sharing. the Ninth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS-IX). Cambridge, MA, November 2000. [pdf](http://www.cs.utexas.edu/users/mckinley/papers/asplos-2000.pdf) - -- \[2] P. Larson and M. Krishnan. _Memory allocation for long-running server applications_. In ISMM, Vancouver, B.C., Canada, 1998. - [pdf](http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=5F0BFB4F57832AEB6C11BF8257271088?doi=10.1.1.45.1947&rep=rep1&type=pdf) +- \[2] P. Larson and M. Krishnan. _Memory allocation for long-running server applications_. + In ISMM, Vancouver, B.C., Canada, 1998. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.45.1947&rep=rep1&type=pdf) - \[3] D. Grunwald, B. Zorn, and R. Henderson. _Improving the cache locality of memory allocation_. In R. Cartwright, editor, - Proceedings of the Conference on Programming Language Design and Implementation, pages 177–186, New York, NY, USA, June 1993. - [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.43.6621&rep=rep1&type=pdf) + Proceedings of the Conference on Programming Language Design and Implementation, pages 177–186, New York, NY, USA, June 1993. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.43.6621&rep=rep1&type=pdf) - \[4] J. Barnes and P. Hut. _A hierarchical O(n*log(n)) force-calculation algorithm_. Nature, 324:446-449, 1986. @@ -468,17 +489,22 @@ how the design of _tbb_ avoids the false cache line sharing. In USENIX Annual Technical Conference, Freenix Session. San Diego, CA. Jun. 2000. Available at -- \[6] Timothy Crundal. _Reducing Active-False Sharing in TCMalloc._ - 2016. . CS16S1 project at the Australian National University. +- \[6] Timothy Crundal. _Reducing Active-False Sharing in TCMalloc_. 2016. CS16S1 project at the Australian National University. [pdf](http://courses.cecs.anu.edu.au/courses/CSPROJECTS/16S1/Reports/Timothy_Crundal_Report.pdf) - \[7] Alexey Kukanov, and Michael J Voss. _The Foundations for Scalable Multi-Core Software in Intel Threading Building Blocks._ Intel Technology Journal 11 (4). 2007 -- \[8] Paul Liétar, Theodore Butler, Sylvan Clebsch, Sophia Drossopoulou, Juliana Franco, Matthew J Parkinson, +- \[8] Bobby Powers, David Tench, Emery D. Berger, and Andrew McGregor. + _Mesh: Compacting Memory Management for C/C++_ + In Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI'19), June 2019, pages 333-–346. + + # Contributing From c2c56e29c7dfa757d49bc7824fa4727657782e8b Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 15:06:44 -0800 Subject: [PATCH 203/293] add benchmark charts --- doc/bench-c5-18xlarge-2020-01-20-a.svg | 886 +++++++++++++++ doc/bench-c5-18xlarge-2020-01-20-b.svg | 1184 ++++++++++++++++++++ doc/bench-c5-18xlarge-2020-01-20-rss-a.svg | 756 +++++++++++++ doc/bench-c5-18xlarge-2020-01-20-rss-b.svg | 1027 +++++++++++++++++ doc/bench-r5a-12xlarge-2020-01-16-a.svg | 867 ++++++++++++++ doc/bench-r5a-12xlarge-2020-01-16-b.svg | 1156 +++++++++++++++++++ 6 files changed, 5876 insertions(+) create mode 100644 doc/bench-c5-18xlarge-2020-01-20-a.svg create mode 100644 doc/bench-c5-18xlarge-2020-01-20-b.svg create mode 100644 doc/bench-c5-18xlarge-2020-01-20-rss-a.svg create mode 100644 doc/bench-c5-18xlarge-2020-01-20-rss-b.svg create mode 100644 doc/bench-r5a-12xlarge-2020-01-16-a.svg create mode 100644 doc/bench-r5a-12xlarge-2020-01-16-b.svg diff --git a/doc/bench-c5-18xlarge-2020-01-20-a.svg b/doc/bench-c5-18xlarge-2020-01-20-a.svg new file mode 100644 index 00000000..0e550935 --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-a.svgo newline at end of file diff --git a/doc/bench-c5-18xlarge-2020-01-20-b.svg b/doc/bench-c5-18xlarge-2020-01-20-b.svg new file mode 100644 index 00000000..22bfa5c2 --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-b.svgo newline at end of file diff --git a/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg b/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg new file mode 100644 index 00000000..6b15ebe5 --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-rss-a.svgo newline at end of file diff --git a/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg b/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg new file mode 100644 index 00000000..e3eb774c --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-rss-b.svgo newline at end of file diff --git a/doc/bench-r5a-12xlarge-2020-01-16-a.svg b/doc/bench-r5a-12xlarge-2020-01-16-a.svg new file mode 100644 index 00000000..b110ff47 --- /dev/null +++ b/doc/bench-r5a-12xlarge-2020-01-16-a.svgo newline at end of file diff --git a/doc/bench-r5a-12xlarge-2020-01-16-b.svg b/doc/bench-r5a-12xlarge-2020-01-16-b.svg new file mode 100644 index 00000000..f7a3287e --- /dev/null +++ b/doc/bench-r5a-12xlarge-2020-01-16-b.svgo newline at end of file From 6f7d98d6698939f49a6ba1b13fab87068df6435e Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 15:08:18 -0800 Subject: [PATCH 204/293] fix benchmark chart links --- readme.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/readme.md b/readme.md index c5c71ac4..388e6470 100644 --- a/readme.md +++ b/readme.md @@ -358,8 +358,8 @@ the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:51222e Bobby Powers _et al_ \[8], and finally the default system allocator (glibc, 2.7.0) (based on _PtMalloc2_). -![bench-c5-18xlarge-a](doc/bench-c5-18xlarge-2020-01-20-a.svq) -![bench-c5-18xlarge-b](doc/bench-c5-18xlarge-2020-01-20-b.svq) +![bench-c5-18xlarge-a](doc/bench-c5-18xlarge-2020-01-20-a.svg) +![bench-c5-18xlarge-b](doc/bench-c5-18xlarge-2020-01-20-b.svg) Any benchmarks ending in `N` run on all processors in parallel. Results are averaged over 10 runs and reported relative @@ -450,8 +450,8 @@ having a 48 processor AMD Epyc 7000 at 2.5GHz with 384GiB of memory. The results are similar to the Intel results but it is interesting to see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks. -![bench-r5a-12xlarge-a](doc/bench-r5a-12xlarge-2020-01-16-a.svq) -![bench-r5a-12xlarge-b](doc/bench-r5a-12xlarge-2020-01-16-b.svq) +![bench-r5a-12xlarge-a](doc/bench-r5a-12xlarge-2020-01-16-a.svg) +![bench-r5a-12xlarge-b](doc/bench-r5a-12xlarge-2020-01-16-b.svg) ## Peak Working Set @@ -459,8 +459,8 @@ see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks The following figure shows the peak working set (rss) of the allocators on the benchmarks (on the c5.18xlarge instance). -![bench-c5-18xlarge-rss-a](doc/bench-c5-18xlarge-2020-01-20-rss-a.svq) -![bench-c5-18xlarge-rss-b](doc/bench-c5-18xlarge-2020-01-20-rss-b.svq) +![bench-c5-18xlarge-rss-a](doc/bench-c5-18xlarge-2020-01-20-rss-a.svg) +![bench-c5-18xlarge-rss-b](doc/bench-c5-18xlarge-2020-01-20-rss-b.svg) Note that the _xmalloc-testN_ memory usage should be disregarded as it allocates more the faster the program runs. Similarly, memory usage of From 433598296a7e154436eabd613968d7f1ea7cd18d Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 22 Jan 2020 15:21:54 -0800 Subject: [PATCH 205/293] Fix benchmark chart display --- readme.md | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/readme.md b/readme.md index 388e6470..db58df30 100644 --- a/readme.md +++ b/readme.md @@ -56,8 +56,8 @@ Enjoy! ### Releases -* 2020-01-XX, `v1.4.0`: stable release 1.4: delayed OS page reset for (much) better performance - with page reset enabled, more eager concurrent free, addition of STL allocator. +* 2020-01-22, `v1.4.0`: stable release 1.4: delayed OS page reset with (much) better performance + (when page reset is enabled), more eager concurrent free, addition of STL allocator, fixed potential memory leak. * 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. @@ -208,14 +208,17 @@ or via environment variables. to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead when possible). -- `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions + - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at startup and can give quite a performance improvement on long running workloads. Usually it is better to not use `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving - contiguous physical memory can take a long time when memory is fragmented. + contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at + startup only once). Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting `MIMALLOC_EAGER_COMMIT_DELAY=N` (with usually `N` as 1) to delay the initial `N` segments of a thread to not allocate in the huge OS pages; this prevents threads that are short lived @@ -358,8 +361,8 @@ the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:51222e Bobby Powers _et al_ \[8], and finally the default system allocator (glibc, 2.7.0) (based on _PtMalloc2_). -![bench-c5-18xlarge-a](doc/bench-c5-18xlarge-2020-01-20-a.svg) -![bench-c5-18xlarge-b](doc/bench-c5-18xlarge-2020-01-20-b.svg) + + Any benchmarks ending in `N` run on all processors in parallel. Results are averaged over 10 runs and reported relative @@ -450,8 +453,8 @@ having a 48 processor AMD Epyc 7000 at 2.5GHz with 384GiB of memory. The results are similar to the Intel results but it is interesting to see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks. -![bench-r5a-12xlarge-a](doc/bench-r5a-12xlarge-2020-01-16-a.svg) -![bench-r5a-12xlarge-b](doc/bench-r5a-12xlarge-2020-01-16-b.svg) + + ## Peak Working Set @@ -459,8 +462,8 @@ see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks The following figure shows the peak working set (rss) of the allocators on the benchmarks (on the c5.18xlarge instance). -![bench-c5-18xlarge-rss-a](doc/bench-c5-18xlarge-2020-01-20-rss-a.svg) -![bench-c5-18xlarge-rss-b](doc/bench-c5-18xlarge-2020-01-20-rss-b.svg) + + Note that the _xmalloc-testN_ memory usage should be disregarded as it allocates more the faster the program runs. Similarly, memory usage of From b7aef989e89aa624bdd9ba487b7df45334568e64 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 22 Jan 2020 15:27:47 -0800 Subject: [PATCH 206/293] Update readme.md --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index db58df30..1dc16c02 100644 --- a/readme.md +++ b/readme.md @@ -350,7 +350,7 @@ Testing on a big Amazon EC2 compute instance ([c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized)) consisting of a 72 processor Intel Xeon at 3GHz with 144GiB ECC memory, running Ubuntu 18.04.1 with LibC 2.27 and GCC 7.4.0. -The measured allocators are _mimalloc_ (mi, tag:v1.4.0, page reset enabled) +The measured allocators are _mimalloc_ (xmi, tag:v1.4.0, page reset enabled) and its secure build as _smi_, Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (tc, tag:gperftools-2.7) used in Chrome, Facebook's [_jemalloc_](https://github.com/jemalloc/jemalloc) (je, tag:5.2.1) by Jason Evans used in Firefox and FreeBSD, From 6a744a8549263696ef8d620006a0de2249e59b46 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 22 Jan 2020 18:16:40 -0800 Subject: [PATCH 207/293] Update readme.md --- readme.md | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/readme.md b/readme.md index 1dc16c02..baac2a93 100644 --- a/readme.md +++ b/readme.md @@ -10,15 +10,15 @@ mimalloc (pronounced "me-malloc") is a general purpose allocator with excellent [performance](#performance) characteristics. Initially developed by Daan Leijen for the run-time systems of the -[Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. +[Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. +Latest release:`v1.4.0` (2020-01-22). It is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: ``` > LD_PRELOAD=/usr/bin/libmimalloc.so myprogram ``` - -Notable aspects of the design include: +It also has an easy way to override the allocator in [Windows](#override_on_windows). Notable aspects of the design include: - __small and consistent__: the library is about 6k LOC using simple and consistent data structures. This makes it very suitable @@ -45,9 +45,10 @@ Notable aspects of the design include: times (_wcat_), bounded space overhead (~0.2% meta-data, with at most 12.5% waste in allocation sizes), and has no internal points of contention using only atomic operations. - __fast__: In our benchmarks (see [below](#performance)), - _mimalloc_ always outperforms all other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), + _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and usually uses less memory (up to 25% more in the worst case). A nice property - is that it does consistently well over a wide range of benchmarks. + is that it does consistently well over a wide range of benchmarks. There is also good huge OS page + support for larger server programs. The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API. You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results. @@ -56,8 +57,8 @@ Enjoy! ### Releases -* 2020-01-22, `v1.4.0`: stable release 1.4: delayed OS page reset with (much) better performance - (when page reset is enabled), more eager concurrent free, addition of STL allocator, fixed potential memory leak. +* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset, +more eager concurrent free, addition of STL allocator, fixed potential memory leak. * 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. @@ -189,7 +190,7 @@ malloc requested: 32.8 mb The above model of using the `mi_` prefixed API is not always possible though in existing programs that already use the standard malloc interface, and another option is to override the standard malloc interface -completely and redirect all calls to the _mimalloc_ library instead. +completely and redirect all calls to the _mimalloc_ library instead . ## Environment Options @@ -215,7 +216,7 @@ or via environment variables. real drawbacks and may improve performance by a little. --> - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at - startup and can give quite a performance improvement on long running workloads. Usually it is better to not use + startup and can give quite a (latency) performance improvement on long running workloads. Usually it is better to not use `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at startup only once). @@ -236,7 +237,7 @@ Overriding the standard `malloc` can be done either _dynamically_ or _statically This is the recommended way to override the standard malloc interface. -### Linux, BSD +### Override on Linux, BSD On these ELF-based systems we preload the mimalloc shared library so all calls to the standard `malloc` interface are @@ -255,7 +256,7 @@ or run with the debug version to get detailed statistics: > env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram ``` -### MacOS +### Override on MacOS On macOS we can also preload the mimalloc shared library so all calls to the standard `malloc` interface are @@ -270,9 +271,9 @@ the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-i Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this (see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)). -### Windows +### Override on Windows -Overriding on Windows is robust but requires that you link your program explicitly with +Overriding on Windows is robust but requires that you link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). @@ -283,7 +284,7 @@ To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project for an example on how to use this. For best performance on Windows with C++, it -is highly recommended to also override the `new`/`delete` operations (by including +is also recommended to also override the `new`/`delete` operations (by including [`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic From 0f14f431c55405b4da992ee3ac54da3726184851 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 18:33:07 -0800 Subject: [PATCH 208/293] bump version to 1.5 for further development --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 20 ++++++++++---------- test/CMakeLists.txt | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 0a982bdf..5137be80 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,5 +1,5 @@ set(mi_version_major 1) -set(mi_version_minor 4) +set(mi_version_minor 5) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 3861ad4f..1c77d462 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 140 // major + 2 digits minor +#define MI_MALLOC_VERSION 150 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes @@ -368,9 +368,9 @@ mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_a #endif template struct mi_stl_allocator { - typedef T value_type; + typedef T value_type; typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; + typedef std::ptrdiff_t difference_type; typedef value_type& reference; typedef value_type const& const_reference; typedef value_type* pointer; @@ -383,23 +383,23 @@ template struct mi_stl_allocator { mi_stl_allocator select_on_container_copy_construction() const { return *this; } void deallocate(T* p, size_type) { mi_free(p); } - #if (__cplusplus >= 201703L) // C++17 + #if (__cplusplus >= 201703L) // C++17 T* allocate(size_type count) { return static_cast(mi_new_n(count, sizeof(T))); } - T* allocate(size_type count, const void*) { return allocate(count); } - #else + T* allocate(size_type count, const void*) { return allocate(count); } + #else pointer allocate(size_type count, const void* = 0) { return static_cast(mi_new_n(count, sizeof(value_type))); } - #endif - + #endif + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 using propagate_on_container_copy_assignment = std::true_type; using propagate_on_container_move_assignment = std::true_type; using propagate_on_container_swap = std::true_type; using is_always_equal = std::true_type; template void construct(U* p, Args&& ...args) { ::new(p) U(std::forward(args)...); } - template void destroy(U* p) mi_attr_noexcept { p->~U(); } + template void destroy(U* p) mi_attr_noexcept { p->~U(); } #else void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } - void destroy(pointer p) { p->~value_type(); } + void destroy(pointer p) { p->~value_type(); } #endif size_type max_size() const mi_attr_noexcept { return (std::numeric_limits::max() / sizeof(value_type)); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4862c0ec..ce077d14 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 1.4 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +find_package(mimalloc 1.5 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") # overriding with a dynamic library From e3b16fe4efacaa220395bc671622d21c98cc17ec Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 19:44:54 -0800 Subject: [PATCH 209/293] reduce type casts in atomic operations --- include/mimalloc-atomic.h | 85 ++++++++++++++++++++------------------- src/alloc.c | 4 +- src/arena.c | 16 ++++---- src/memory.c | 14 +++---- src/options.c | 12 +++--- src/os.c | 8 ++-- src/page.c | 14 +++---- src/segment.c | 10 ++--- src/stats.c | 22 +++++----- 9 files changed, 93 insertions(+), 92 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 5d140f0c..8577dbc5 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -23,18 +23,16 @@ terms of the MIT license. A copy of the license can be found in the file #include #endif -#define mi_atomic_cast(tp,x) (volatile _Atomic(tp)*)(x) - // ------------------------------------------------------ // Atomic operations specialized for mimalloc // ------------------------------------------------------ // Atomically add a 64-bit value; returns the previous value. // Note: not using _Atomic(int64_t) as it is only used for statistics. -static inline void mi_atomic_add64(volatile int64_t* p, int64_t add); +static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add); // Atomically add a value; returns the previous value. Memory ordering is relaxed. -static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add); +static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add); // Atomically "and" a value; returns the previous value. Memory ordering is relaxed. static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x); @@ -42,7 +40,6 @@ static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t // Atomically "or" a value; returns the previous value. Memory ordering is relaxed. static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x); - // Atomically compare and exchange a value; returns `true` if successful. // May fail spuriously. Memory ordering as release on success, and relaxed on failure. // (Note: expected and desired are in opposite order from atomic_compare_exchange) @@ -69,57 +66,57 @@ static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x); static inline void mi_atomic_yield(void); - -// Atomically add a value; returns the previous value. -static inline uintptr_t mi_atomic_addu(volatile _Atomic(uintptr_t)* p, uintptr_t add) { - return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, (intptr_t)add); -} // Atomically subtract a value; returns the previous value. -static inline uintptr_t mi_atomic_subu(volatile _Atomic(uintptr_t)* p, uintptr_t sub) { - return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, -((intptr_t)sub)); +static inline uintptr_t mi_atomic_sub(volatile _Atomic(uintptr_t)* p, uintptr_t sub) { + return mi_atomic_add(p, (uintptr_t)(-((intptr_t)sub))); } // Atomically increment a value; returns the incremented result. static inline uintptr_t mi_atomic_increment(volatile _Atomic(uintptr_t)* p) { - return mi_atomic_addu(p, 1); + return mi_atomic_add(p, 1); } // Atomically decrement a value; returns the decremented result. static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) { - return mi_atomic_subu(p, 1); + return mi_atomic_sub(p, 1); } -// Atomically read a pointer; Memory order is relaxed. -static inline void* mi_atomic_read_ptr_relaxed(volatile _Atomic(void*) const * p) { - return (void*)mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)p); +// Atomically add a signed value; returns the previous value. +static inline intptr_t mi_atomic_addi(volatile _Atomic(intptr_t)* p, intptr_t add) { + return (intptr_t)mi_atomic_add((volatile _Atomic(uintptr_t)*)p, (uintptr_t)add); } +// Atomically subtract a signed value; returns the previous value. +static inline intptr_t mi_atomic_subi(volatile _Atomic(intptr_t)* p, intptr_t sub) { + return (intptr_t)mi_atomic_addi(p,-sub); +} + +// Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic). +#define mi_atomic_read_ptr_relaxed(T,p) \ + (T*)(mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)(p))) + // Atomically read a pointer; Memory order is acquire. -static inline void* mi_atomic_read_ptr(volatile _Atomic(void*) const * p) { - return (void*)mi_atomic_read((const volatile _Atomic(uintptr_t)*)p); -} +#define mi_atomic_read_ptr(T,p) \ + (T*)(mi_atomic_read((const volatile _Atomic(uintptr_t)*)(p))) -// Atomically write a pointer -static inline void mi_atomic_write_ptr(volatile _Atomic(void*)* p, void* x) { - mi_atomic_write((volatile _Atomic(uintptr_t)*)p, (uintptr_t)x ); -} +// Atomically write a pointer; Memory order is acquire. +#define mi_atomic_write_ptr(T,p,x) \ + mi_atomic_write((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)x)) // Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously. +// Memory order is release. (like a write) // (Note: expected and desired are in opposite order from atomic_compare_exchange) -static inline bool mi_atomic_cas_ptr_weak(volatile _Atomic(void*)* p, void* desired, void* expected) { - return mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected); -} - -// Atomically compare and exchange a pointer; returns `true` if successful. +#define mi_atomic_cas_ptr_weak(T,p,desired,expected) \ + mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected))) + +// Atomically compare and exchange a pointer; returns `true` if successful. Memory order is acquire_release. // (Note: expected and desired are in opposite order from atomic_compare_exchange) -static inline bool mi_atomic_cas_ptr_strong(volatile _Atomic(void*)* p, void* desired, void* expected) { - return mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected); -} +#define mi_atomic_cas_ptr_strong(T,p,desired,expected) \ + mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)(p),(uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected))) // Atomically exchange a pointer value. -static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exchange) { - return (void*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)p, (uintptr_t)exchange); -} +#define mi_atomic_exchange_ptr(T,p,exchange) \ + (T*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange)) #ifdef _MSC_VER @@ -133,8 +130,8 @@ typedef LONG64 msc_intptr_t; typedef LONG msc_intptr_t; #define MI_64(f) f #endif -static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { - return (intptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); +static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); } static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); @@ -155,17 +152,21 @@ static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { return *p; } static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { - return mi_atomic_read(p); + return *p; } static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + #if defined(_M_IX86) || defined(_M_X64) + *p = x; + #else mi_atomic_exchange(p,x); + #endif } static inline void mi_atomic_yield(void) { YieldProcessor(); } -static inline void mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { +static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) { #ifdef _WIN64 - mi_atomic_add(p,add); + mi_atomic_addi(p,add); #else int64_t current; int64_t sum; @@ -182,11 +183,11 @@ static inline void mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { #else #define MI_USING_STD #endif -static inline void mi_atomic_add64(volatile int64_t* p, int64_t add) { +static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add) { MI_USING_STD atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); } -static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { +static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { MI_USING_STD return atomic_fetch_add_explicit(p, add, memory_order_relaxed); } diff --git a/src/alloc.c b/src/alloc.c index 20339204..847c1830 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -239,9 +239,9 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) mi_block_t* dfree; do { - dfree = (mi_block_t*)heap->thread_delayed_free; + dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]); - } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); + } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } // and reset the MI_DELAYED_FREEING flag diff --git a/src/arena.c b/src/arena.c index f20a03e9..fde28685 100644 --- a/src/arena.c +++ b/src/arena.c @@ -55,7 +55,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); // A memory arena descriptor typedef struct mi_arena_s { - uint8_t* start; // the start of the memory area + _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) int numa_node; // associated NUMA node @@ -173,7 +173,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); + mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena==NULL) break; // end reached if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages @@ -185,7 +185,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, } // try from another numa node instead.. for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); + mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena==NULL) break; // end reached if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages @@ -226,7 +226,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { size_t bitmap_idx; mi_arena_id_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); + mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); if (arena == NULL) { _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); @@ -252,15 +252,15 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { static bool mi_arena_add(mi_arena_t* arena) { mi_assert_internal(arena != NULL); - mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0); + mi_assert_internal((uintptr_t)mi_atomic_read_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - uintptr_t i = mi_atomic_addu(&mi_arena_count,1); + uintptr_t i = mi_atomic_increment(&mi_arena_count); if (i >= MI_MAX_ARENAS) { - mi_atomic_subu(&mi_arena_count, 1); + mi_atomic_decrement(&mi_arena_count); return false; } - mi_atomic_write_ptr(mi_atomic_cast(void*,&mi_arenas[i]), arena); + mi_atomic_write_ptr(mi_arena_t,&mi_arenas[i], arena); return true; } diff --git a/src/memory.c b/src/memory.c index 9603a26f..a442a35d 100644 --- a/src/memory.c +++ b/src/memory.c @@ -125,7 +125,7 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; size_t count = mi_atomic_read_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(®ions[i].start); + uint8_t* start = mi_atomic_read_ptr_relaxed(uint8_t,®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; @@ -133,9 +133,9 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { - void* start = mi_atomic_read_ptr(®ion->start); + uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); mi_assert_internal(start != NULL); - return ((uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE)); + return (start + (bit_idx * MI_SEGMENT_SIZE)); } static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { @@ -200,7 +200,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mi_atomic_write(&r->reset, 0); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); - mi_atomic_write_ptr(&r->start, start); + mi_atomic_write_ptr(uint8_t*,&r->start, start); // and share it mi_region_info_t info; @@ -277,14 +277,14 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_region_info_t info; info.value = mi_atomic_read(®ion->info); - void* start = mi_atomic_read_ptr(®ion->start); + uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); mi_assert_internal(!(info.x.is_large && !*is_large)); mi_assert_internal(start != NULL); *is_zero = mi_bitmap_unclaim(®ion->dirty, 1, blocks, bit_idx); *is_large = info.x.is_large; *memid = mi_memid_create(region, bit_idx); - void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); + void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); // commit if (*commit) { @@ -446,7 +446,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { } while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 )); if (m == 0) { // on success, free the whole region - void* start = mi_atomic_read_ptr(®ions[i].start); + uint8_t* start = mi_atomic_read_ptr(uint8_t,®ions[i].start); size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid); memset(®ions[i], 0, sizeof(mem_region_t)); // and release the whole region diff --git a/src/options.c b/src/options.c index b06cbdb4..76cdbef0 100644 --- a/src/options.c +++ b/src/options.c @@ -169,7 +169,7 @@ static void mi_out_buf(const char* msg, void* arg) { size_t n = strlen(msg); if (n==0) return; // claim space - uintptr_t start = mi_atomic_addu(&out_len, n); + uintptr_t start = mi_atomic_add(&out_len, n); if (start >= MI_MAX_DELAY_OUTPUT) return; // check bound if (start+n >= MI_MAX_DELAY_OUTPUT) { @@ -181,7 +181,7 @@ static void mi_out_buf(const char* msg, void* arg) { static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { if (out==NULL) return; // claim (if `no_more_buf == true`, no more output will be added after this point) - size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); + size_t count = mi_atomic_add(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; @@ -212,14 +212,14 @@ static mi_output_fun* volatile mi_out_default; // = NULL static volatile _Atomic(void*) mi_out_arg; // = NULL static mi_output_fun* mi_out_get_default(void** parg) { - if (parg != NULL) { *parg = mi_atomic_read_ptr(&mi_out_arg); } + if (parg != NULL) { *parg = mi_atomic_read_ptr(void,&mi_out_arg); } mi_output_fun* out = mi_out_default; return (out == NULL ? &mi_out_buf : out); } void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - mi_atomic_write_ptr(&mi_out_arg, arg); + mi_atomic_write_ptr(void,&mi_out_arg, arg); if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now } @@ -328,7 +328,7 @@ static void mi_error_default(int err) { void mi_register_error(mi_error_fun* fun, void* arg) { mi_error_handler = fun; // can be NULL - mi_atomic_write_ptr(&mi_error_arg, arg); + mi_atomic_write_ptr(void,&mi_error_arg, arg); } void _mi_error_message(int err, const char* fmt, ...) { @@ -339,7 +339,7 @@ void _mi_error_message(int err, const char* fmt, ...) { va_end(args); // and call the error handler which may abort (or return normally) if (mi_error_handler != NULL) { - mi_error_handler(err, mi_atomic_read_ptr(&mi_error_arg)); + mi_error_handler(err, mi_atomic_read_ptr(void,&mi_error_arg)); } else { mi_error_default(err); diff --git a/src/os.c b/src/os.c index be507b69..6e8c12d8 100644 --- a/src/os.c +++ b/src/os.c @@ -397,20 +397,20 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // On 64-bit systems, we can do efficient aligned allocation by using // the 4TiB to 30TiB area to allocate them. #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) -static volatile _Atomic(intptr_t) aligned_base; +static volatile _Atomic(uintptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; if ((size%MI_SEGMENT_SIZE) != 0) return NULL; - intptr_t hint = mi_atomic_add(&aligned_base, size); + uintptr_t hint = mi_atomic_add(&aligned_base, size); if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) - intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area + uintptr_t init = ((uintptr_t)4 << 40); // start at 4TiB area #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif - mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); + mi_atomic_cas_strong(&aligned_base, init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } if (hint%try_alignment != 0) return NULL; diff --git a/src/page.c b/src/page.c index 7840a590..5ac5d9a6 100644 --- a/src/page.c +++ b/src/page.c @@ -278,11 +278,11 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { (put there by other threads if they deallocated in a full page) ----------------------------------------------------------- */ void _mi_heap_delayed_free(mi_heap_t* heap) { - // take over the list + // take over the list (note: no atomic exchange is it is often NULL) mi_block_t* block; do { - block = (mi_block_t*)heap->thread_delayed_free; - } while (block != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), NULL, block)); + block = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); + } while (block != NULL && !mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, NULL, block)); // and free them all while(block != NULL) { @@ -293,9 +293,9 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // reset the delayed_freeing flag; in that case delay it further by reinserting. mi_block_t* dfree; do { - dfree = (mi_block_t*)heap->thread_delayed_free; + dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]); - } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); + } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } block = next; } @@ -728,14 +728,14 @@ void _mi_deferred_free(mi_heap_t* heap, bool force) { heap->tld->heartbeat++; if (deferred_free != NULL && !heap->tld->recurse) { heap->tld->recurse = true; - deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(&deferred_arg)); + deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(void,&deferred_arg)); heap->tld->recurse = false; } } void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept { deferred_free = fn; - mi_atomic_write_ptr(&deferred_arg, arg); + mi_atomic_write_ptr(void,&deferred_arg, arg); } diff --git a/src/segment.c b/src/segment.c index ea030d7a..a76871d0 100644 --- a/src/segment.c +++ b/src/segment.c @@ -822,7 +822,7 @@ static void mi_segments_prepend_abandoned(mi_segment_t* first) { if (first == NULL) return; // first try if the abandoned list happens to be NULL - if (mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, NULL)) return; + if (mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, NULL)) return; // if not, find the end of the list mi_segment_t* last = first; @@ -833,9 +833,9 @@ static void mi_segments_prepend_abandoned(mi_segment_t* first) { // and atomically prepend mi_segment_t* next; do { - next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned)); + next = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); last->abandoned_next = next; - } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, next)); + } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, next)); } static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { @@ -877,9 +877,9 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) { // To avoid the A-B-A problem, grab the entire list atomically - mi_segment_t* segment = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned)); // pre-read to avoid expensive atomic operations + mi_segment_t* segment = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); // pre-read to avoid expensive atomic operations if (segment == NULL) return false; - segment = (mi_segment_t*)mi_atomic_exchange_ptr(mi_atomic_cast(void*, &abandoned), NULL); + segment = mi_atomic_exchange_ptr(mi_segment_t, &abandoned, NULL); if (segment == NULL) return false; // we got a non-empty list diff --git a/src/stats.c b/src/stats.c index 57599821..a1404502 100644 --- a/src/stats.c +++ b/src/stats.c @@ -26,13 +26,13 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (mi_is_in_main(stat)) { // add atomically (for abandoned pages) - mi_atomic_add64(&stat->current,amount); + mi_atomic_addi64(&stat->current,amount); if (stat->current > stat->peak) stat->peak = stat->current; // racing.. it's ok if (amount > 0) { - mi_atomic_add64(&stat->allocated,amount); + mi_atomic_addi64(&stat->allocated,amount); } else { - mi_atomic_add64(&stat->freed, -amount); + mi_atomic_addi64(&stat->freed, -amount); } } else { @@ -50,8 +50,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { if (mi_is_in_main(stat)) { - mi_atomic_add64( &stat->count, 1 ); - mi_atomic_add64( &stat->total, (int64_t)amount ); + mi_atomic_addi64( &stat->count, 1 ); + mi_atomic_addi64( &stat->total, (int64_t)amount ); } else { stat->count++; @@ -70,17 +70,17 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { // must be thread safe as it is called from stats_merge static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { if (stat==src) return; - mi_atomic_add64( &stat->allocated, src->allocated * unit); - mi_atomic_add64( &stat->current, src->current * unit); - mi_atomic_add64( &stat->freed, src->freed * unit); + mi_atomic_addi64( &stat->allocated, src->allocated * unit); + mi_atomic_addi64( &stat->current, src->current * unit); + mi_atomic_addi64( &stat->freed, src->freed * unit); // peak scores do not work across threads.. - mi_atomic_add64( &stat->peak, src->peak * unit); + mi_atomic_addi64( &stat->peak, src->peak * unit); } static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) { if (stat==src) return; - mi_atomic_add64( &stat->total, src->total * unit); - mi_atomic_add64( &stat->count, src->count * unit); + mi_atomic_addi64( &stat->total, src->total * unit); + mi_atomic_addi64( &stat->count, src->count * unit); } // must be thread safe as it is called from stats_merge From 0193a15f7e602ae081dd97f1d5f099dd4e05266a Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 19:46:43 -0800 Subject: [PATCH 210/293] nicer message on huge OS page reservation --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index fde28685..acb92243 100644 --- a/src/arena.c +++ b/src/arena.c @@ -282,7 +282,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; } - _mi_verbose_message("reserved %zu gb huge pages (of the %zu gb requested)\n", pages_reserved, pages); + _mi_verbose_message("reserved %zu gb huge pages on numa node %i (of the %zu gb requested)\n", pages_reserved, numa_node, pages); size_t bcount = mi_block_count_of_size(hsize); size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); From 3bbbe6c686f33040022030a81437aaf694e26e08 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 20:12:42 -0800 Subject: [PATCH 211/293] enable atomic yield when delayed_freeing is encountered --- src/heap.c | 2 +- src/page.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/heap.c b/src/heap.c index 12aa0840..bdd833c3 100644 --- a/src/heap.c +++ b/src/heap.c @@ -147,7 +147,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - mi_assert_internal( collect != ABANDON || heap->thread_delayed_free == NULL ); + mi_assert_internal( collect != ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); // collect segment caches if (collect >= FORCE) { diff --git a/src/page.c b/src/page.c index 5ac5d9a6..fb75b826 100644 --- a/src/page.c +++ b/src/page.c @@ -130,7 +130,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { - // mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. + mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } else if (delay == old_delay) { From 66818bf632fb3197019951f9028d38c3e9da44f6 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 09:57:55 -0800 Subject: [PATCH 212/293] use atomic yield on delayed-freeing; clarify code --- src/heap.c | 46 +++++++++++++++++++++++----------------------- src/page.c | 7 ++++--- src/segment.c | 20 ++++++++++---------- test/test-stress.c | 4 ++-- 4 files changed, 39 insertions(+), 38 deletions(-) diff --git a/src/heap.c b/src/heap.c index bdd833c3..2a4f98af 100644 --- a/src/heap.c +++ b/src/heap.c @@ -76,9 +76,9 @@ static bool mi_heap_is_valid(mi_heap_t* heap) { ----------------------------------------------------------- */ typedef enum mi_collect_e { - NORMAL, - FORCE, - ABANDON + MI_NORMAL, + MI_FORCE, + MI_ABANDON } mi_collect_t; @@ -87,12 +87,13 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t UNUSED(heap); mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_collect_t collect = *((mi_collect_t*)arg_collect); - _mi_page_free_collect(page, collect >= ABANDON); + _mi_page_free_collect(page, collect >= MI_FORCE); if (mi_page_all_free(page)) { - // no more used blocks, free the page. TODO: should we retire here and be less aggressive? - _mi_page_free(page, pq, collect != NORMAL); + // no more used blocks, free the page. + // note: this will free retired pages as well. + _mi_page_free(page, pq, collect >= MI_FORCE); } - else if (collect == ABANDON) { + else if (collect == MI_ABANDON) { // still used blocks but the thread is done; abandon the page _mi_page_abandon(page, pq); } @@ -111,61 +112,60 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { if (!mi_heap_is_initialized(heap)) return; - _mi_deferred_free(heap, collect > NORMAL); + _mi_deferred_free(heap, collect >= MI_FORCE); // collect (some) abandoned pages - if (collect >= NORMAL && !heap->no_reclaim) { - if (collect == NORMAL) { + if (collect >= MI_NORMAL && !heap->no_reclaim) { + if (collect == MI_NORMAL) { // this may free some segments (but also take ownership of abandoned pages) _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments); } else if ( #ifdef NDEBUG - collect == FORCE + collect == MI_FORCE #else - collect >= FORCE + collect >= MI_FORCE #endif && _mi_is_main_thread() && mi_heap_is_backing(heap)) { - // the main thread is abandoned, try to free all abandoned segments. + // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. // if all memory is freed by now, all segments should be freed. _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); } } // if abandoning, mark all pages to no longer add to delayed_free - if (collect == ABANDON) { - //for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) { - // _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE - //} + if (collect == MI_ABANDON) { mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); } // free thread delayed blocks. - // (if abandoning, after this there are no more local references into the pages.) + // (if abandoning, after this there are no more thread-delayed references into the pages.) _mi_heap_delayed_free(heap); // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - mi_assert_internal( collect != ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); + mi_assert_internal( collect != MI_ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); // collect segment caches - if (collect >= FORCE) { + if (collect >= MI_FORCE) { _mi_segment_thread_collect(&heap->tld->segments); } + #ifndef NDEBUG // collect regions - if (collect >= FORCE && _mi_is_main_thread()) { + if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { _mi_mem_collect(&heap->tld->os); } + #endif } void _mi_heap_collect_abandon(mi_heap_t* heap) { - mi_heap_collect_ex(heap, ABANDON); + mi_heap_collect_ex(heap, MI_ABANDON); } void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept { - mi_heap_collect_ex(heap, (force ? FORCE : NORMAL)); + mi_heap_collect_ex(heap, (force ? MI_FORCE : MI_NORMAL)); } void mi_collect(bool force) mi_attr_noexcept { diff --git a/src/page.c b/src/page.c index fb75b826..149926e8 100644 --- a/src/page.c +++ b/src/page.c @@ -126,12 +126,12 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid mi_thread_free_t tfreex; mi_delayed_t old_delay; do { - tfree = mi_atomic_read(&page->xthread_free); + tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break this loop and not do a CAS tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. - tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail + // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } else if (delay == old_delay) { break; // avoid atomic operation if already equal @@ -139,7 +139,8 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { break; // leave never-delayed flag set } - } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); + } while ((old_delay == MI_DELAYED_FREEING) || + !mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } /* ----------------------------------------------------------- diff --git a/src/segment.c b/src/segment.c index a76871d0..85e8817b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -824,18 +824,18 @@ static void mi_segments_prepend_abandoned(mi_segment_t* first) { // first try if the abandoned list happens to be NULL if (mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, NULL)) return; - // if not, find the end of the list + // if not, find the end of the argument list mi_segment_t* last = first; while (last->abandoned_next != NULL) { last = last->abandoned_next; } // and atomically prepend - mi_segment_t* next; + mi_segment_t* anext; do { - next = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); - last->abandoned_next = next; - } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, next)); + anext = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); + last->abandoned_next = anext; + } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, anext)); } static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { @@ -897,14 +897,14 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen atmost--; } // split the list and push back the remaining segments - mi_segment_t* next = last->abandoned_next; + mi_segment_t* anext = last->abandoned_next; last->abandoned_next = NULL; - mi_segments_prepend_abandoned(next); + mi_segments_prepend_abandoned(anext); } // reclaim all segments that we kept while(segment != NULL) { - mi_segment_t* const next = segment->abandoned_next; // save the next segment + mi_segment_t* const anext = segment->abandoned_next; // save the next segment // got it. mi_atomic_decrement(&abandoned_count); @@ -943,7 +943,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } } mi_assert(segment->abandoned == 0); - if (segment->used == 0) { // due to page_clear + if (segment->used == 0) { // due to page_clear's mi_segment_free(segment,false,tld); } else { @@ -954,7 +954,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } // go on - segment = next; + segment = anext; } return true; diff --git a/test/test-stress.c b/test/test-stress.c index 83f9b87b..28bd4a56 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -277,12 +277,12 @@ static void run_os_threads(size_t nthreads) { #ifdef __cplusplus #include static void* atomic_exchange_ptr(volatile void** p, void* newval) { - return std::atomic_exchange_explicit((volatile std::atomic*)p, newval, std::memory_order_acquire); + return std::atomic_exchange((volatile std::atomic*)p, newval); } #else #include static void* atomic_exchange_ptr(volatile void** p, void* newval) { - return atomic_exchange_explicit((volatile _Atomic(void*)*)p, newval, memory_order_acquire); + return atomic_exchange((volatile _Atomic(void*)*)p, newval); } #endif From 0316859e0666bc7138e45789d71d2829656f85f3 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 10:03:03 -0800 Subject: [PATCH 213/293] improve codegen for mi_free --- src/alloc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 847c1830..3f577f2f 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. -extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { +extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* block = page->free; if (mi_unlikely(block == NULL)) { @@ -290,7 +290,8 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p } -static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool local, void* p) { +static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) { + mi_page_t* page = _mi_segment_page_of(segment, p); mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); _mi_free_block(page, local, block); } @@ -338,7 +339,7 @@ void mi_free(void* p) mi_attr_noexcept if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned - mi_block_t* block = (mi_block_t*)p; + mi_block_t* const block = (mi_block_t*)p; if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; @@ -349,7 +350,8 @@ void mi_free(void* p) mi_attr_noexcept } else { // non-local, aligned blocks, or a full page; use the more generic path - mi_free_generic(segment, page, tid == segment->thread_id, p); + // note: recalc page in generic to improve code generation + mi_free_generic(segment, tid == segment->thread_id, p); } } From 6fb434a99b72838f53f75899076e3cd949b9fb57 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 10:31:47 -0800 Subject: [PATCH 214/293] use -fvisibility=hidden on clang as well --- CMakeLists.txt | 3 +-- include/mimalloc-internal.h | 2 +- include/mimalloc.h | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 366ffc44..95318a0e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,10 +107,9 @@ endif() # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") - list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas) + list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden) if(CMAKE_C_COMPILER_ID MATCHES "GNU") list(APPEND mi_cflags -Wno-invalid-memory-model) - list(APPEND mi_cflags -fvisibility=hidden) endif() endif() diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index eaa327be..88a0f86d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -23,7 +23,7 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(_MSC_VER) #pragma warning(disable:4127) // constant conditional due to MI_SECURE paths #define mi_decl_noinline __declspec(noinline) -#elif defined(__GNUC__) || defined(__clang__) +#elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #else #define mi_decl_noinline diff --git a/include/mimalloc.h b/include/mimalloc.h index 1c77d462..7cf455e6 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -43,7 +43,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) #define mi_attr_alloc_align(p) -#elif defined(__GNUC__) || defined(__clang__) +#elif defined(__GNUC__) // includes clang and icc #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #define mi_decl_thread __thread #define mi_decl_export __attribute__((visibility("default"))) From cdc34595cfd3c26aa0d366fb70199509846b40db Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 10:52:27 -0800 Subject: [PATCH 215/293] fix warning in msvc --- ide/vs2017/mimalloc-override-test.vcxproj | 2 +- ide/vs2017/mimalloc.vcxproj | 4 ++-- ide/vs2019/mimalloc-override-test.vcxproj | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ide/vs2017/mimalloc-override-test.vcxproj b/ide/vs2017/mimalloc-override-test.vcxproj index b8e2648b..faaa00e3 100644 --- a/ide/vs2017/mimalloc-override-test.vcxproj +++ b/ide/vs2017/mimalloc-override-test.vcxproj @@ -112,7 +112,7 @@ true ..\..\include MultiThreadedDebugDLL - false + Sync Default false diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 55f37392..e08deec4 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -111,7 +111,7 @@ - Level3 + Level4 Disabled true true @@ -165,7 +165,7 @@ - Level3 + Level4 MaxSpeed true true diff --git a/ide/vs2019/mimalloc-override-test.vcxproj b/ide/vs2019/mimalloc-override-test.vcxproj index 79adedb0..a2497a19 100644 --- a/ide/vs2019/mimalloc-override-test.vcxproj +++ b/ide/vs2019/mimalloc-override-test.vcxproj @@ -90,7 +90,7 @@ true ..\..\include MultiThreadedDebugDLL - false + Sync Default false @@ -112,7 +112,7 @@ true ..\..\include MultiThreadedDebugDLL - false + Sync Default false From c9106e74a8bd50d8da2360c19741c74ac1cd0592 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 11:06:25 -0800 Subject: [PATCH 216/293] remove __thread attribute from mimalloc.h --- include/mimalloc-internal.h | 9 ++++++--- include/mimalloc.h | 3 --- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 88a0f86d..6fca06b8 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -21,12 +21,15 @@ terms of the MIT license. A copy of the license can be found in the file #endif #if defined(_MSC_VER) -#pragma warning(disable:4127) // constant conditional due to MI_SECURE paths -#define mi_decl_noinline __declspec(noinline) +#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) +#define mi_decl_noinline __declspec(noinline) +#define mi_decl_thread __declspec(thread) #elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc -#define mi_decl_noinline __attribute__((noinline)) +#define mi_decl_noinline __attribute__((noinline)) +#define mi_decl_thread __thread #else #define mi_decl_noinline +#define mi_decl_thread __thread // hope for the best :-) #endif diff --git a/include/mimalloc.h b/include/mimalloc.h index 7cf455e6..94fcd788 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -38,14 +38,12 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_allocator __declspec(restrict) #endif #define mi_cdecl __cdecl - #define mi_decl_thread __declspec(thread) #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) #define mi_attr_alloc_align(p) #elif defined(__GNUC__) // includes clang and icc #define mi_cdecl // leads to warnings... __attribute__((cdecl)) - #define mi_decl_thread __thread #define mi_decl_export __attribute__((visibility("default"))) #define mi_decl_allocator #define mi_attr_malloc __attribute__((malloc)) @@ -64,7 +62,6 @@ terms of the MIT license. A copy of the license can be found in the file #endif #else #define mi_cdecl - #define mi_decl_thread __thread #define mi_decl_export #define mi_decl_allocator #define mi_attr_malloc From 76e727f7d1828d02c51b1c0266dca9eeb61ede2d Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 15:39:38 -0800 Subject: [PATCH 217/293] fix assertion on page destroy --- src/heap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/heap.c b/src/heap.c index 2a4f98af..ab55efae 100644 --- a/src/heap.c +++ b/src/heap.c @@ -274,6 +274,9 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ page->used = 0; // and free the page + // mi_page_free(page,false); + page->next = NULL; + page->prev = NULL; _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments); return true; // keep going From 12701b1aac8d66ffe92bd1f80bc401d285fa32a4 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 15:48:51 -0800 Subject: [PATCH 218/293] do not reclaim segments on collect --- src/heap.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/heap.c b/src/heap.c index ab55efae..1f436b06 100644 --- a/src/heap.c +++ b/src/heap.c @@ -114,25 +114,20 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) if (!mi_heap_is_initialized(heap)) return; _mi_deferred_free(heap, collect >= MI_FORCE); - // collect (some) abandoned pages - if (collect >= MI_NORMAL && !heap->no_reclaim) { - if (collect == MI_NORMAL) { - // this may free some segments (but also take ownership of abandoned pages) - _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments); - } - else if ( - #ifdef NDEBUG - collect == MI_FORCE - #else - collect >= MI_FORCE - #endif - && _mi_is_main_thread() && mi_heap_is_backing(heap)) - { - // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. - // if all memory is freed by now, all segments should be freed. - _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); - } + // note: never reclaim on collect but leave it to threads that need storage to reclaim + if ( + #ifdef NDEBUG + collect == MI_FORCE + #else + collect >= MI_FORCE + #endif + && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim) + { + // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. + // if all memory is freed by now, all segments should be freed. + _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); } + // if abandoning, mark all pages to no longer add to delayed_free if (collect == MI_ABANDON) { From f8ab4bd7dc6467ae15e1f61968a46d220d94c0d5 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 19:49:32 -0800 Subject: [PATCH 219/293] add leak test --- test/test-stress.c | 91 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 27 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index 28bd4a56..67ec9f05 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -37,11 +37,11 @@ static size_t use_one_size = 0; // use single object size of `N * s #ifdef USE_STD_MALLOC -#define custom_malloc(s) malloc(s) +#define custom_calloc(n,s) calloc(n,s) #define custom_realloc(p,s) realloc(p,s) #define custom_free(p) free(p) #else -#define custom_malloc(s) mi_malloc(s) +#define custom_calloc(n,s) mi_calloc(n,s) #define custom_realloc(p,s) mi_realloc(p,s) #define custom_free(p) mi_free(p) #endif @@ -94,9 +94,12 @@ static void* alloc_items(size_t items, random_t r) { } if (items == 40) items++; // pthreads uses that size for stack increases if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t)); - uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t)); + if (items==0) items = 1; + uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t)); if (p != NULL) { - for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + for (uintptr_t i = 0; i < items; i++) { + p[i] = (items - i) ^ cookie; + } } return p; } @@ -126,7 +129,7 @@ static void stress(intptr_t tid) { void** data = NULL; size_t data_size = 0; size_t data_top = 0; - void** retained = (void**)custom_malloc(retain * sizeof(void*)); + void** retained = (void**)custom_calloc(retain,sizeof(void*)); size_t retain_top = 0; while (allocs > 0 || retain > 0) { @@ -171,7 +174,45 @@ static void stress(intptr_t tid) { //bench_end_thread(); } -static void run_os_threads(size_t nthreads); +static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid)); + +static void test_stress(void) { + uintptr_t r = 43 * 43; + for (int n = 0; n < ITER; n++) { + run_os_threads(THREADS, &stress); + for (int i = 0; i < TRANSFERS; i++) { + if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers + void* p = atomic_exchange_ptr(&transfer[i], NULL); + free_items(p); + } + } + mi_collect(false); +#ifndef NDEBUG + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } +#endif + } +} + +static void leak(intptr_t tid) { + uintptr_t r = 43*tid; + void* p = alloc_items(pick(&r)%128, &r); + if (chance(10, &r)) { + intptr_t i = (pick(&r) % TRANSFERS); + void* q = atomic_exchange_ptr(&transfer[i], p); + free_items(q); + } +} + +static void test_leak(void) { + for (int n = 0; n < ITER; n++) { + run_os_threads(THREADS, &leak); + mi_collect(false); +#ifndef NDEBUG + //if ((n + 1) % 10 == 0) + { printf("- iterations left: %3d\n", ITER - (n + 1)); } +#endif + } +} int main(int argc, char** argv) { // > mimalloc-test-stress [THREADS] [SCALE] [ITER] @@ -198,19 +239,11 @@ int main(int argc, char** argv) { // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. mi_stats_reset(); - uintptr_t r = 43 * 43; - for (int n = 0; n < ITER; n++) { - run_os_threads(THREADS); - for (int i = 0; i < TRANSFERS; i++) { - if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers - void* p = atomic_exchange_ptr(&transfer[i], NULL); - free_items(p); - } - } - mi_collect(false); -#ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } -#endif + if (true) { + test_stress(); + } + else { + test_leak(); } mi_collect(true); @@ -220,18 +253,21 @@ int main(int argc, char** argv) { } +static void (*thread_entry_fun)(intptr_t) = &stress; + #ifdef _WIN32 #include static DWORD WINAPI thread_entry(LPVOID param) { - stress((intptr_t)param); + thread_entry_fun((intptr_t)param); return 0; } -static void run_os_threads(size_t nthreads) { - DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD)); - HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE)); +static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { + thread_entry_fun = fun; + DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD)); + HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE)); for (uintptr_t i = 0; i < nthreads; i++) { thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); } @@ -246,7 +282,7 @@ static void run_os_threads(size_t nthreads) { } static void* atomic_exchange_ptr(volatile void** p, void* newval) { -#if (INTPTR_MAX == UINT32_MAX) +#if (INTPTR_MAX == INT32_MAX) return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval); #else return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval); @@ -257,12 +293,13 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) { #include static void* thread_entry(void* param) { - stress((uintptr_t)param); + thread_entry_fun((uintptr_t)param); return NULL; } -static void run_os_threads(size_t nthreads) { - pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t)); +static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { + thread_entry_fun = fun; + pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t)); memset(threads, 0, sizeof(pthread_t) * nthreads); //pthread_setconcurrency(nthreads); for (uintptr_t i = 0; i < nthreads; i++) { From 4a2a0c2d503ad5334555f4f86d7f0128b3676aae Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 19:50:35 -0800 Subject: [PATCH 220/293] initial abandon based on fine-grained reclamation --- include/mimalloc-internal.h | 7 +- src/heap.c | 2 +- src/memory.c | 4 +- src/page.c | 49 ++--- src/segment.c | 365 +++++++++++++++++++++++++----------- 5 files changed, 289 insertions(+), 138 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 6fca06b8..3335414a 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -75,12 +75,13 @@ bool _mi_mem_unprotect(void* addr, size_t size); void _mi_mem_collect(mi_os_tld_t* tld); // "segment.c" -mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); -bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); -void _mi_segment_thread_collect(mi_segments_tld_t* tld); uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page +void _mi_segment_thread_collect(mi_segments_tld_t* tld); +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); +void _mi_abandoned_await_readers(void); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; diff --git a/src/heap.c b/src/heap.c index 1f436b06..e76a147c 100644 --- a/src/heap.c +++ b/src/heap.c @@ -125,7 +125,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. // if all memory is freed by now, all segments should be freed. - _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); + _mi_abandoned_reclaim_all(heap, &heap->tld->segments); } diff --git a/src/memory.c b/src/memory.c index a442a35d..c7388054 100644 --- a/src/memory.c +++ b/src/memory.c @@ -419,6 +419,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re bool any_unreset; mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); if (any_unreset) { + _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); } } @@ -451,7 +452,8 @@ void _mi_mem_collect(mi_os_tld_t* tld) { memset(®ions[i], 0, sizeof(mem_region_t)); // and release the whole region mi_atomic_write(®ion->info, 0); - if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { + if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { + _mi_abandoned_await_readers(); // ensure no pending reads _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); } } diff --git a/src/page.c b/src/page.c index 149926e8..c5b86b08 100644 --- a/src/page.c +++ b/src/page.c @@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); - +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); #if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { @@ -242,32 +242,37 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { // allocate a fresh page from a segment static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) { mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq)); - mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os); - if (page == NULL) return NULL; - mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_page_init(heap, page, block_size, heap->tld); - _mi_stat_increase( &heap->tld->stats.pages, 1); - if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL - mi_assert_expensive(_mi_page_is_valid(page)); - return page; + mi_assert_internal(pq==NULL||block_size == pq->block_size); + mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os); + if (page == NULL) { + // this may be out-of-memory, or a page was reclaimed + if (pq!=NULL && (page = pq->first) != NULL) { + mi_assert_expensive(_mi_page_is_valid(page)); + if (!mi_page_immediate_available(page)) { + mi_page_extend_free(heap, page, heap->tld); + } + mi_assert_internal(mi_page_immediate_available(page)); + if (mi_page_immediate_available(page)) { + return page; // reclaimed page + } + } + return NULL; // out-of-memory + } + else { + // a fresh page was allocated, initialize it + mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); + mi_page_init(heap, page, block_size, heap->tld); + _mi_stat_increase(&heap->tld->stats.pages, 1); + if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL + mi_assert_expensive(_mi_page_is_valid(page)); + return page; + } } // Get a fresh page to use static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { mi_assert_internal(mi_heap_contains_queue(heap, pq)); - - // try to reclaim an abandoned page first - mi_page_t* page = pq->first; - if (!heap->no_reclaim && - _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments) && - page != pq->first) - { - // we reclaimed, and we got lucky with a reclaimed page in our queue - page = pq->first; - if (page->free != NULL) return page; - } - // otherwise allocate the page - page = mi_page_fresh_alloc(heap, pq, pq->block_size); + mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size); if (page==NULL) return NULL; mi_assert_internal(pq->block_size==mi_page_block_size(page)); mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); diff --git a/src/segment.c b/src/segment.c index 85e8817b..95ae6d8b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -743,7 +743,9 @@ static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { +// clear page data; can be called on abandoned segments +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool allow_reset, mi_segments_tld_t* tld) +{ mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); @@ -773,7 +775,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg segment->used--; // add to the free page list for reuse/reset - if (segment->page_kind <= MI_PAGE_MEDIUM) { + if (allow_reset && segment->page_kind <= MI_PAGE_MEDIUM) { mi_pages_reset_add(segment, page, tld); } } @@ -786,7 +788,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_reset_delayed(tld); // mark it as free now - mi_segment_page_clear(segment, page, tld); + mi_segment_page_clear(segment, page, true, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment @@ -814,39 +816,122 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) // live blocks (reached through other threads). Such segments // are "abandoned" and will be reclaimed by other threads to // reuse their pages and/or free them eventually -static volatile _Atomic(mi_segment_t*) abandoned; // = NULL; -static volatile _Atomic(uintptr_t) abandoned_count; // = 0; approximate count of abandoned segments -// prepend a list of abandoned segments atomically to the global abandoned list; O(n) -static void mi_segments_prepend_abandoned(mi_segment_t* first) { - if (first == NULL) return; - // first try if the abandoned list happens to be NULL - if (mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, NULL)) return; +#define MI_TAGGED_MASK MI_SEGMENT_MASK +typedef uintptr_t mi_tagged_segment_t; - // if not, find the end of the argument list +static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) { + return (mi_segment_t*)(ts & ~MI_TAGGED_MASK); +} + +static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) { + mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0); + if (segment==NULL) return 0; // no need to tag NULL + uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK; + return ((uintptr_t)segment | tag); +} + +static volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL +static volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL +static volatile _Atomic(uintptr_t) abandoned_readers; // = 0 + +static void mi_abandoned_visited_push(mi_segment_t* segment) { + mi_assert_internal(segment->thread_id == 0); + mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_internal(segment->used > 0); + mi_segment_t* anext; + do { + anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited); + segment->abandoned_next = anext; + } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext)); +} + +static bool mi_abandoned_visited_revisit(void) { + // grab the whole visited list + mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL); + if (first == NULL) return false; + + // first try to swap directly if the abandoned list happens to be NULL + mi_tagged_segment_t afirst = mi_tagged_segment(first,0); + if (mi_atomic_cas_weak(&abandoned, afirst, 0)) return true; + + // find the last element of the visited list: O(n) mi_segment_t* last = first; while (last->abandoned_next != NULL) { last = last->abandoned_next; } - // and atomically prepend - mi_segment_t* anext; + // and atomically prepend to the abandoned list + // (no need to increase the readers as we don't access the abandoned segments) + mi_tagged_segment_t anext; do { - anext = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); - last->abandoned_next = anext; - } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, anext)); + anext = mi_atomic_read_relaxed(&abandoned); + last->abandoned_next = mi_tagged_segment_ptr(anext); + afirst = mi_tagged_segment(first, anext); + } while (!mi_atomic_cas_weak(&abandoned, afirst, anext)); + return true; } +static void mi_abandoned_push(mi_segment_t* segment) { + mi_assert_internal(segment->thread_id == 0); + mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_internal(segment->used > 0); + mi_tagged_segment_t ts; + mi_tagged_segment_t next; + do { + ts = mi_atomic_read_relaxed(&abandoned); + segment->abandoned_next = mi_tagged_segment_ptr(ts); + next = mi_tagged_segment(segment, ts); + } while (!mi_atomic_cas_weak(&abandoned, next, ts)); +} + +void _mi_abandoned_await_readers(void) { + uintptr_t n; + do { + n = mi_atomic_read(&abandoned_readers); + if (n != 0) mi_atomic_yield(); + } while (n != 0); +} + +static mi_segment_t* mi_abandoned_pop(void) { + mi_segment_t* segment; + mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + segment = mi_tagged_segment_ptr(ts); + if (segment == NULL) { + if (!mi_abandoned_visited_revisit()) return NULL; // try to swap in the visited list on NULL + } + // Do a pop. We use a reader lock to prevent + // a segment to be decommitted while a read is still pending, and a tagged + // pointer to prevent A-B-A link corruption. + mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted + mi_tagged_segment_t next = 0; + do { + ts = mi_atomic_read_relaxed(&abandoned); + segment = mi_tagged_segment_ptr(ts); + if (segment != NULL) { + next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads segment so should not be decommitted + } + } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts)); + mi_atomic_decrement(&abandoned_readers); // release reader lock + if (segment != NULL) { + segment->abandoned_next = NULL; + } + return segment; +} + + static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); - mi_assert_expensive(mi_segment_is_valid(segment,tld)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); // remove the segment from the free page queue if needed - mi_reset_delayed(tld); - mi_pages_reset_remove_all_in_segment(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld); + mi_reset_delayed(tld); + mi_pages_reset_remove_all_in_segment(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld); mi_segment_remove_from_free_queue(segment, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); @@ -855,8 +940,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; segment->abandoned_next = NULL; - mi_segments_prepend_abandoned(segment); // prepend one-element list - mi_atomic_increment(&abandoned_count); // keep approximate count + mi_abandoned_push(segment); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -865,107 +949,164 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(mi_page_heap(page) == NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(!mi_pages_reset_contains(page, tld)); - mi_assert_expensive(mi_segment_is_valid(segment,tld)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); if (segment->used == segment->abandoned) { // all pages are abandoned, abandon the entire segment - mi_segment_abandon(segment,tld); + mi_segment_abandon(segment, tld); } } -bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) { - // To avoid the A-B-A problem, grab the entire list atomically - mi_segment_t* segment = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); // pre-read to avoid expensive atomic operations - if (segment == NULL) return false; - segment = mi_atomic_exchange_ptr(mi_segment_t, &abandoned, NULL); - if (segment == NULL) return false; - // we got a non-empty list - if (!try_all) { - // take at most 1/8th of the list and append the rest back to the abandoned list again - // this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem) - // and probably ok since the length will tend to be not too large. - uintptr_t atmost = mi_atomic_read(&abandoned_count)/8; // at most 1/8th of all outstanding (estimated) - if (atmost < 8) atmost = 8; // but at least 8 - - // find the split point - mi_segment_t* last = segment; - while (last->abandoned_next != NULL && atmost > 0) { - last = last->abandoned_next; - atmost--; - } - // split the list and push back the remaining segments - mi_segment_t* anext = last->abandoned_next; - last->abandoned_next = NULL; - mi_segments_prepend_abandoned(anext); - } - - // reclaim all segments that we kept - while(segment != NULL) { - mi_segment_t* const anext = segment->abandoned_next; // save the next segment - - // got it. - mi_atomic_decrement(&abandoned_count); - segment->thread_id = _mi_thread_id(); - segment->abandoned_next = NULL; - mi_segments_track_size((long)segment->segment_size,tld); - mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_expensive(mi_segment_is_valid(segment,tld)); - _mi_stat_decrease(&tld->stats->segments_abandoned,1); - - // add its abandoned pages to the current thread - mi_assert(segment->abandoned == segment->used); - for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (page->segment_in_use) { - mi_assert_internal(!page->is_reset); - mi_assert_internal(page->is_committed); - mi_assert_internal(mi_page_not_in_queue(page, tld)); - mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); - mi_assert_internal(mi_page_heap(page) == NULL); +static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, mi_segments_tld_t* tld) +{ + mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); + bool has_page = false; + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->segment_in_use) { + mi_assert_internal(!page->is_reset); + mi_assert_internal(page->is_committed); + mi_assert_internal(mi_page_not_in_queue(page, tld)); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); + mi_assert_internal(page->next == NULL); + // ensure used count is up to date and collect potential concurrent frees + _mi_page_free_collect(page, false); + if (mi_page_all_free(page)) { + // if everything free already, clear the page directly segment->abandoned--; - mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); - // set the heap again and allow delayed free again - mi_page_set_heap(page, heap); - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) - _mi_page_free_collect(page, false); // ensure used count is up to date - if (mi_page_all_free(page)) { - // if everything free already, clear the page directly - mi_segment_page_clear(segment,page,tld); - } - else { - // otherwise reclaim it into the heap - _mi_page_reclaim(heap,page); - } + mi_segment_page_clear(segment, page, false, tld); // no reset allowed (as the segment is still abandoned) + has_page = true; + } + else if (page->xblock_size == block_size && page->used < page->reserved) { + // a page has available free blocks of the right size + has_page = true; } } - mi_assert(segment->abandoned == 0); - if (segment->used == 0) { // due to page_clear's - mi_segment_free(segment,false,tld); + } + return has_page; +} + +#define MI_RECLAIMED ((mi_segment_t*)1) + +static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, mi_segments_tld_t* tld) { + UNUSED_RELEASE(page_kind); + mi_assert_internal(page_kind == segment->page_kind); + mi_assert_internal(segment->abandoned_next == NULL); + bool right_page_reclaimed = false; + + segment->thread_id = _mi_thread_id(); + mi_segments_track_size((long)segment->segment_size, tld); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); + _mi_stat_decrease(&tld->stats->segments_abandoned, 1); + + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->segment_in_use) { + mi_assert_internal(!page->is_reset); + mi_assert_internal(page->is_committed); + mi_assert_internal(mi_page_not_in_queue(page, tld)); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); + segment->abandoned--; + mi_assert(page->next == NULL); + _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + // set the heap again and allow delayed free again + mi_page_set_heap(page, heap); + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) + mi_assert_internal(!mi_page_all_free(page)); + // TODO: should we not collect again given that we just collected? + _mi_page_free_collect(page, false); // ensure used count is up to date + if (mi_page_all_free(page)) { + // if everything free already, clear the page directly + mi_segment_page_clear(segment, page, true, tld); // reset is ok now + } + else { + // otherwise reclaim it into the heap + _mi_page_reclaim(heap, page); + if (block_size == page->xblock_size) { + right_page_reclaimed = true; + } + } + } + } + mi_assert_internal(segment->abandoned == 0); + if (right_page_reclaimed) { + // add the segment's free pages to the free small segment queue + if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { + mi_segment_insert_in_free_queue(segment, tld); + } + // and return reclaimed: at the page allocation the page is already in the queue now + return MI_RECLAIMED; + } + else { + // otherwise return the segment as it will contain some free pages + mi_assert_internal(segment->used < segment->capacity); + return segment; + } +} + +static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, mi_segments_tld_t* tld) +{ + mi_segment_t* segment; + int max_tries = 8; // limit the work to bound allocation times + while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + bool has_page = mi_segment_pages_collect(segment,block_size,tld); // try to free up pages (due to concurrent frees) + if (has_page && segment->page_kind == page_kind) { + // found a free page of the right kind, or page of the right block_size with free space + return mi_segment_reclaim(segment, heap, block_size, page_kind, tld); + } + else if (segment->used==0) { + // free the segment to make it available to other threads + mi_segment_os_free(segment, segment->segment_size, tld); } else { - // add its free pages to the the current thread free small segment queue - if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { - mi_segment_insert_in_free_queue(segment,tld); - } + // push on the visited list so it gets not looked at too quickly again + mi_abandoned_visited_push(segment); } - - // go on - segment = anext; } - - return true; + return NULL; } +static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + mi_assert_internal(page_kind <= MI_PAGE_LARGE); + mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); + mi_segment_t* segment = mi_segment_try_reclaim(heap, block_size, page_kind, tld); + if (segment == MI_RECLAIMED) { + return NULL; // pretend out-of-memory as the page will be in the page queue + } + else if (segment == NULL) { + return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); + } + else { + return segment; + } +} + +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { + mi_segment_t* segment; + while ((segment = mi_abandoned_pop()) != NULL) { + mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, segment->page_kind, tld); + mi_assert_internal(res != NULL); + if (res != MI_RECLAIMED && res != NULL) { + mi_assert_internal(res == segment); + if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { + mi_segment_insert_in_free_queue(segment, tld); + } + } + } +} /* ----------------------------------------------------------- Small page allocation ----------------------------------------------------------- */ - static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_assert_expensive(mi_segment_is_valid(segment, tld)); @@ -986,13 +1127,15 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl return mi_segment_find_free(segment, tld); } -static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { +static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { // find an available segment the segment free queue mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); if (mi_segment_queue_is_empty(free_queue)) { // possibly allocate a fresh segment - mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld); - if (segment == NULL) return NULL; // return NULL if out-of-memory + mi_segment_t* segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld); + if (segment == NULL) return NULL; // return NULL if out-of-memory (or reclaimed) + mi_assert_internal(segment->page_kind==kind); + mi_assert_internal(segment->used < segment->capacity); mi_segment_enqueue(free_queue, segment); } mi_assert_internal(free_queue->first != NULL); @@ -1005,20 +1148,20 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, return page; } -static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_page_alloc(MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld,os_tld); +static mi_page_t* mi_segment_small_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_page_alloc(heap, block_size, MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld,os_tld); } -static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_page_alloc(MI_PAGE_MEDIUM, MI_MEDIUM_PAGE_SHIFT, tld, os_tld); +static mi_page_t* mi_segment_medium_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_page_alloc(heap, block_size, MI_PAGE_MEDIUM, MI_MEDIUM_PAGE_SHIFT, tld, os_tld); } /* ----------------------------------------------------------- large page allocation ----------------------------------------------------------- */ -static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); +static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + mi_segment_t* segment = mi_segment_reclaim_or_alloc(heap,block_size,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); if (segment == NULL) return NULL; mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); @@ -1043,16 +1186,16 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld Page allocation and free ----------------------------------------------------------- */ -mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { - page = mi_segment_small_page_alloc(tld,os_tld); + page = mi_segment_small_page_alloc(heap, block_size, tld, os_tld); } else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { - page = mi_segment_medium_page_alloc(tld, os_tld); + page = mi_segment_medium_page_alloc(heap, block_size, tld, os_tld); } else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { - page = mi_segment_large_page_alloc(tld, os_tld); + page = mi_segment_large_page_alloc(heap, block_size, tld, os_tld); } else { page = mi_segment_huge_page_alloc(block_size,tld,os_tld); From 58fdcbb0cd6fbe426237f334ba4a7cf8decebf35 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 21:37:14 -0800 Subject: [PATCH 221/293] fix bug in collect where has_page was not set on free pages --- src/options.c | 2 +- src/segment.c | 19 ++++++++++++++----- test/test-stress.c | 29 +++++++++++++++++++++++++---- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/options.c b/src/options.c index 76cdbef0..cb5d4049 100644 --- a/src/options.c +++ b/src/options.c @@ -70,7 +70,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment.c b/src/segment.c index 95ae6d8b..a4b61377 100644 --- a/src/segment.c +++ b/src/segment.c @@ -231,6 +231,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* ----------------------------------------------------------- */ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { + mi_assert_internal(page->is_committed); if (!mi_option_is_enabled(mi_option_page_reset)) return; if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; size_t psize; @@ -330,7 +331,7 @@ static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool for if (segment->mem_is_fixed) return; // never reset in huge OS pages for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use && !page->is_reset) { + if (!page->segment_in_use && page->is_committed && !page->is_reset) { mi_pages_reset_remove(page, tld); if (force_reset) { mi_page_reset(segment, page, 0, tld); @@ -544,8 +545,12 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache == NULL); - mi_assert_internal(tld->pages_reset.first == NULL); - mi_assert_internal(tld->pages_reset.last == NULL); +#if MI_DEBUG>=2 + if (!_mi_is_main_thread()) { + mi_assert_internal(tld->pages_reset.first == NULL); + mi_assert_internal(tld->pages_reset.last == NULL); + } +#endif } @@ -979,7 +984,7 @@ static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, m // if everything free already, clear the page directly segment->abandoned--; _mi_stat_decrease(&tld->stats->pages_abandoned, 1); - mi_segment_page_clear(segment, page, false, tld); // no reset allowed (as the segment is still abandoned) + mi_segment_page_clear(segment, page, false, tld); // no (delayed) reset allowed (as the segment is still abandoned) has_page = true; } else if (page->xblock_size == block_size && page->used < page->reserved) { @@ -987,6 +992,9 @@ static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, m has_page = true; } } + else { + has_page = true; + } } return has_page; } @@ -1046,7 +1054,8 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } else { // otherwise return the segment as it will contain some free pages - mi_assert_internal(segment->used < segment->capacity); + // (except for abandoned_reclaim_all which uses a block_size of zero) + mi_assert_internal(segment->used < segment->capacity || block_size == 0); return segment; } } diff --git a/test/test-stress.c b/test/test-stress.c index 67ec9f05..7869cc8c 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -57,6 +57,7 @@ const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; const uintptr_t cookie = 0x1ce4e5b9UL; #endif +static uintptr_t ticks(void); static void* atomic_exchange_ptr(volatile void** p, void* newval); typedef uintptr_t* random_t; @@ -121,7 +122,7 @@ static void free_items(void* p) { static void stress(intptr_t tid) { //bench_start_thread(); - uintptr_t r = tid * 43; + uintptr_t r = (tid * 43)^ticks(); const size_t max_item_shift = 5; // 128 const size_t max_item_retained_shift = max_item_shift + 2; size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more @@ -194,9 +195,9 @@ static void test_stress(void) { } static void leak(intptr_t tid) { - uintptr_t r = 43*tid; + uintptr_t r = (43*tid)^ticks(); void* p = alloc_items(pick(&r)%128, &r); - if (chance(10, &r)) { + if (chance(50, &r)) { intptr_t i = (pick(&r) % TRANSFERS); void* q = atomic_exchange_ptr(&transfer[i], p); free_items(q); @@ -259,7 +260,13 @@ static void (*thread_entry_fun)(intptr_t) = &stress; #include -static DWORD WINAPI thread_entry(LPVOID param) { +static uintptr_t ticks(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return (uintptr_t)t.QuadPart; +} + +static DWORD WINAPI thread_entry(LPVOID param) { thread_entry_fun((intptr_t)param); return 0; } @@ -323,4 +330,18 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) { } #endif +#include +#ifdef CLOCK_REALTIME +uintptr_t ticks(void) { + struct timespec t; + clock_gettime(CLOCK_REALTIME, &t); + return (uintptr_t)t.tv_sec * 1000) + ((uintptr_t)t.tv_nsec / 1000000); +} +#else +// low resolution timer +uintptr_t _mi_clock_now(void) { + return ((uintptr_t)clock() / ((uintptr_t)CLOCKS_PER_SEC / 1000)); +} +#endif + #endif From e68293741edb043e9e8bdbfa06896d5c187024f7 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 21:44:32 -0800 Subject: [PATCH 222/293] fix assertion, add check for page committed before doing reset --- src/segment.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/segment.c b/src/segment.c index 85e8817b..3914d770 100644 --- a/src/segment.c +++ b/src/segment.c @@ -231,6 +231,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* ----------------------------------------------------------- */ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { + mi_assert_internal(page->is_committed); if (!mi_option_is_enabled(mi_option_page_reset)) return; if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; size_t psize; @@ -330,7 +331,7 @@ static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool for if (segment->mem_is_fixed) return; // never reset in huge OS pages for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use && !page->is_reset) { + if (!page->segment_in_use && page->is_committed && !page->is_reset) { mi_pages_reset_remove(page, tld); if (force_reset) { mi_page_reset(segment, page, 0, tld); @@ -544,8 +545,12 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache == NULL); - mi_assert_internal(tld->pages_reset.first == NULL); - mi_assert_internal(tld->pages_reset.last == NULL); +#if MI_DEBUG>=2 + if (!_mi_is_main_thread()) { + mi_assert_internal(tld->pages_reset.first == NULL); + mi_assert_internal(tld->pages_reset.last == NULL); + } +#endif } From 8cf4882a85f9ab64c77bc93898b71aedf27a1dbb Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 10:38:25 -0800 Subject: [PATCH 223/293] fix linux build --- test/test-stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test-stress.c b/test/test-stress.c index 6e2b20e9..40ddbd47 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -335,7 +335,7 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) { uintptr_t ticks(void) { struct timespec t; clock_gettime(CLOCK_REALTIME, &t); - return (uintptr_t)t.tv_sec * 1000) + ((uintptr_t)t.tv_nsec / 1000000); + return ((uintptr_t)t.tv_sec * 1000) + ((uintptr_t)t.tv_nsec / 1000000); } #else // low resolution timer From 28c14d99c317063fc6a869a9261f125a30106fe5 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 11:03:12 -0800 Subject: [PATCH 224/293] clean up comments --- src/memory.c | 59 +++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/src/memory.c b/src/memory.c index a442a35d..287de414 100644 --- a/src/memory.c +++ b/src/memory.c @@ -57,7 +57,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo // Constants #if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map +#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 64KiB for the region map #elif (MI_INTPTR_SIZE==4) #define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map #else @@ -72,14 +72,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo #define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB #define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) -// Region info is a pointer to the memory region and two bits for -// its flags: is_large, and is_committed. +// Region info typedef union mi_region_info_u { - uintptr_t value; + uintptr_t value; struct { - bool valid; - bool is_large; - short numa_node; + bool valid; // initialized? + bool is_large; // allocated in fixed large/huge OS pages + short numa_node; // the associated NUMA node (where -1 means no associated node) } x; } mi_region_info_t; @@ -87,12 +86,12 @@ typedef union mi_region_info_u { // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(uintptr_t) info; // is_large, and associated numa node + 1 (so 0 is no association) - volatile _Atomic(void*) start; // start of the memory area (and flags) + volatile _Atomic(uintptr_t) info; // mi_region_info_t.value + volatile _Atomic(void*) start; // start of the memory area mi_bitmap_field_t in_use; // bit per in-use block mi_bitmap_field_t dirty; // track if non-zero per block - mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed)) - mi_bitmap_field_t reset; // track reset per block + mi_bitmap_field_t commit; // track if committed per block + mi_bitmap_field_t reset; // track if reset per block volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena- } mem_region_t; @@ -239,11 +238,13 @@ static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, { // try all regions for a free slot const size_t count = mi_atomic_read(®ions_count); - size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? + size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around mem_region_t* r = ®ions[idx]; + // if this region suits our demand (numa node matches, large OS page matches) if (mi_region_is_suitable(r, numa_node, allow_large)) { + // then try to atomically claim a segment(s) in this region if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; @@ -263,15 +264,15 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); // try to claim in existing regions if (!mi_region_try_claim(numa_node, blocks, *is_large, ®ion, &bit_idx, tld)) { - // otherwise try to allocate a fresh region + // otherwise try to allocate a fresh region and claim in there if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { // out of regions or memory return NULL; } } - - // found a region and claimed `blocks` at `bit_idx` + // ------------------------------------------------ + // found a region and claimed `blocks` at `bit_idx`, initialize them now mi_assert_internal(region != NULL); mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); @@ -346,25 +347,27 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l size = _mi_align_up(size, _mi_os_page_size()); // allocate from regions if possible + void* p = NULL; size_t arena_memid; const size_t blocks = mi_region_block_count(size); if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { - void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); - mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0); - if (p != NULL) { - #if (MI_DEBUG>=2) - if (*commit) { ((uint8_t*)p)[0] = 0; } - #endif - return p; + p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); + if (p == NULL) { + _mi_warning_message("unable to allocate from region: size %zu\n", size); } - _mi_warning_message("unable to allocate from region: size %zu\n", size); + } + if (p == NULL) { + // and otherwise fall back to the OS + p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); + *memid = mi_memid_create_from_arena(arena_memid); } - // and otherwise fall back to the OS - void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); - *memid = mi_memid_create_from_arena(arena_memid); - mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); - if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; } + if (p != NULL) { + mi_assert_internal((uintptr_t)p % alignment == 0); +#if (MI_DEBUG>=2) + if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed +#endif + } return p; } From 4ae51096ecdee7f1d4b309f38f6c272a8f61d473 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 15:45:03 -0800 Subject: [PATCH 225/293] add warning on region exhaustion --- src/memory.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index 287de414..96047b79 100644 --- a/src/memory.c +++ b/src/memory.c @@ -92,7 +92,8 @@ typedef struct mem_region_s { mi_bitmap_field_t dirty; // track if non-zero per block mi_bitmap_field_t commit; // track if committed per block mi_bitmap_field_t reset; // track if reset per block - volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena- + volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena + uintptr_t padding; // round to 8 fields } mem_region_t; // The region map @@ -187,6 +188,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, if (idx >= MI_REGION_MAX) { mi_atomic_decrement(®ions_count); _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); + _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB)); return false; } From e070eba112f80b4b4c007cc8cd6696463bf1884b Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 16:30:52 -0800 Subject: [PATCH 226/293] fix tagged null encoding, search segment cache before reclaim --- src/options.c | 2 +- src/segment.c | 149 +++++++++++++++++++++++++++++++-------------- test/test-stress.c | 16 ++--- 3 files changed, 111 insertions(+), 56 deletions(-) diff --git a/src/options.c b/src/options.c index cb5d4049..af051aa2 100644 --- a/src/options.c +++ b/src/options.c @@ -67,7 +67,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed diff --git a/src/segment.c b/src/segment.c index a4b61377..7aced87d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -15,27 +15,25 @@ terms of the MIT license. A copy of the license can be found in the file static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); -/* ----------------------------------------------------------- +/* -------------------------------------------------------------------------------- Segment allocation - We allocate pages inside big OS allocated "segments" - (4mb on 64-bit). This is to avoid splitting VMA's on Linux - and reduce fragmentation on other OS's. Each thread - owns its own segments. + We allocate pages inside bigger "segments" (4mb on 64-bit). This is to avoid + splitting VMA's on Linux and reduce fragmentation on other OS's. + Each thread owns its own segments. Currently we have: - small pages (64kb), 64 in one segment - medium pages (512kb), 8 in one segment - large pages (4mb), 1 in one segment - - huge blocks > MI_LARGE_OBJ_SIZE_MAX (512kb) are directly allocated by the OS + - huge blocks > MI_LARGE_OBJ_SIZE_MAX become large segment with 1 page - In any case the memory for a segment is virtual and only - committed on demand (i.e. we are careful to not touch the memory - until we actually allocate a block there) + In any case the memory for a segment is virtual and usually committed on demand. + (i.e. we are careful to not touch the memory until we actually allocate a block there) If a thread ends, it "abandons" pages with used blocks and there is an abandoned segment list whose segments can be reclaimed by still running threads, much like work-stealing. ------------------------------------------------------------ */ +-------------------------------------------------------------------------------- */ /* ----------------------------------------------------------- @@ -559,8 +557,11 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { ----------------------------------------------------------- */ // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + // the segment parameter is non-null if it came from our cache + mi_assert_internal(segment==NULL || (required==0 && page_kind <= MI_PAGE_LARGE)); + // calculate needed sizes first size_t capacity; if (page_kind == MI_PAGE_HUGE) { @@ -587,8 +588,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, bool is_zero = false; // Try to get it from our thread local cache first - mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { + // came from cache + mi_assert_internal(segment->segment_size == segment_size); if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) { pages_still_good = true; } @@ -674,6 +676,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, return segment; } +static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_init(NULL, required, page_kind, page_shift, tld, os_tld); +} static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { UNUSED(force); @@ -814,15 +819,23 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) /* ----------------------------------------------------------- - Abandonment +Abandonment + +When threads terminate, they can leave segments with +live blocks (reached through other threads). Such segments +are "abandoned" and will be reclaimed by other threads to +reuse their pages and/or free them eventually + +We maintain a global list of abandoned segments that are +reclaimed on demand. Since this is shared among threads +the implementation needs to avoid the A-B-A problem on +popping abandoned segments which is why tagged pointers are +used. ----------------------------------------------------------- */ -// When threads terminate, they can leave segments with -// live blocks (reached through other threads). Such segments -// are "abandoned" and will be reclaimed by other threads to -// reuse their pages and/or free them eventually - - +// Use the bottom 20-bits (on 64-bit) of the aligned segment +// pointers to put in a tag that increments on update to avoid +// the A-B-A problem. #define MI_TAGGED_MASK MI_SEGMENT_MASK typedef uintptr_t mi_tagged_segment_t; @@ -832,15 +845,23 @@ static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) { static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) { mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0); - if (segment==NULL) return 0; // no need to tag NULL uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK; return ((uintptr_t)segment | tag); } +// This is a list of visited abandoned pages that were full at the time. +// this list migrates to `abandoned` when that becomes NULL. static volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL + +// The abandoned page list. static volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL + +// We also maintain a count of current readers of the abandoned list +// in order to prevent resetting/decommitting segment memory if it might +// still be read. static volatile _Atomic(uintptr_t) abandoned_readers; // = 0 +// Push on the visited list static void mi_abandoned_visited_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); mi_assert_internal(segment->abandoned_next == NULL); @@ -853,14 +874,23 @@ static void mi_abandoned_visited_push(mi_segment_t* segment) { } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext)); } -static bool mi_abandoned_visited_revisit(void) { +// Move the visited list to the abandoned list. +static bool mi_abandoned_visited_revisit(void) +{ + // quick check if the visited list is empty + if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false; + // grab the whole visited list mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL); if (first == NULL) return false; // first try to swap directly if the abandoned list happens to be NULL - mi_tagged_segment_t afirst = mi_tagged_segment(first,0); - if (mi_atomic_cas_weak(&abandoned, afirst, 0)) return true; + const mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t afirst; + if (mi_tagged_segment_ptr(ts)==NULL) { + afirst = mi_tagged_segment(first, ts); + if (mi_atomic_cas_strong(&abandoned, afirst, ts)) return true; + } // find the last element of the visited list: O(n) mi_segment_t* last = first; @@ -879,6 +909,7 @@ static bool mi_abandoned_visited_revisit(void) { return true; } +// Push on the abandoned list. static void mi_abandoned_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); mi_assert_internal(segment->abandoned_next == NULL); @@ -893,6 +924,7 @@ static void mi_abandoned_push(mi_segment_t* segment) { } while (!mi_atomic_cas_weak(&abandoned, next, ts)); } +// Wait until there are no more pending reads on segments that used to be in the abandoned list void _mi_abandoned_await_readers(void) { uintptr_t n; do { @@ -901,23 +933,28 @@ void _mi_abandoned_await_readers(void) { } while (n != 0); } +// Pop from the abandoned list static mi_segment_t* mi_abandoned_pop(void) { mi_segment_t* segment; + // Check efficiently if it is empty (or if the visited list needs to be moved) mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); segment = mi_tagged_segment_ptr(ts); - if (segment == NULL) { - if (!mi_abandoned_visited_revisit()) return NULL; // try to swap in the visited list on NULL + if (mi_likely(segment == NULL)) { + if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL + return NULL; + } } - // Do a pop. We use a reader lock to prevent - // a segment to be decommitted while a read is still pending, and a tagged - // pointer to prevent A-B-A link corruption. + + // Do a pop. We use a reader count to prevent + // a segment to be decommitted while a read is still pending, + // and a tagged pointer to prevent A-B-A link corruption. mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; do { ts = mi_atomic_read_relaxed(&abandoned); segment = mi_tagged_segment_ptr(ts); if (segment != NULL) { - next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads segment so should not be decommitted + next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted } } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts)); mi_atomic_decrement(&abandoned_readers); // release reader lock @@ -927,6 +964,9 @@ static mi_segment_t* mi_abandoned_pop(void) { return segment; } +/* ----------------------------------------------------------- + Abandon segment/page +----------------------------------------------------------- */ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); @@ -945,7 +985,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; segment->abandoned_next = NULL; - mi_abandoned_push(segment); + mi_abandoned_push(segment); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -964,6 +1004,9 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { } } +/* ----------------------------------------------------------- + Reclaim abandoned pages +----------------------------------------------------------- */ static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, mi_segments_tld_t* tld) { @@ -1082,22 +1125,6 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, return NULL; } -static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) -{ - mi_assert_internal(page_kind <= MI_PAGE_LARGE); - mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); - mi_segment_t* segment = mi_segment_try_reclaim(heap, block_size, page_kind, tld); - if (segment == MI_RECLAIMED) { - return NULL; // pretend out-of-memory as the page will be in the page queue - } - else if (segment == NULL) { - return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); - } - else { - return segment; - } -} - void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { mi_segment_t* segment; while ((segment = mi_abandoned_pop()) != NULL) { @@ -1112,6 +1139,34 @@ void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { } } + +/* ----------------------------------------------------------- + Reclaim or allocate +----------------------------------------------------------- */ + +static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + mi_assert_internal(page_kind <= MI_PAGE_LARGE); + mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); + // 1. try to get a segment from our cache + mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld); + if (segment != NULL) { + mi_segment_init(segment, 0, page_kind, page_shift, tld, os_tld); + return segment; + } + // 2. try to reclaim an abandoned segment + segment = mi_segment_try_reclaim(heap, block_size, page_kind, tld); + if (segment == MI_RECLAIMED) { + return NULL; // pretend out-of-memory as the page will be in the page queue of the heap + } + else if (segment != NULL) { + return segment; // reclaimed a segment with empty pages in it + } + // 3. otherwise allocate a fresh segment + return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); +} + + /* ----------------------------------------------------------- Small page allocation ----------------------------------------------------------- */ @@ -1192,7 +1247,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld } /* ----------------------------------------------------------- - Page allocation and free + Page allocation ----------------------------------------------------------- */ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { diff --git a/test/test-stress.c b/test/test-stress.c index 40ddbd47..72e4e853 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -32,8 +32,10 @@ static int ITER = 50; // N full iterations destructing and re-creating a // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor +#define STRESS // undefine for leak test + static bool allow_large_objects = true; // allow very large objects? -static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? +static size_t use_one_size = 1; // use single object size of `N * sizeof(uintptr_t)`? #ifdef USE_STD_MALLOC @@ -189,7 +191,7 @@ static void test_stress(void) { } mi_collect(false); #ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif } } @@ -209,8 +211,7 @@ static void test_leak(void) { run_os_threads(THREADS, &leak); mi_collect(false); #ifndef NDEBUG - //if ((n + 1) % 10 == 0) - { printf("- iterations left: %3d\n", ITER - (n + 1)); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif } } @@ -240,12 +241,11 @@ int main(int argc, char** argv) { // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. mi_stats_reset(); - if (true) { +#ifdef STRESS test_stress(); - } - else { +#else test_leak(); - } +#endif mi_collect(true); mi_stats_print(NULL); From b31bc52618658bdb12cb316c13580ecd82bfd8d9 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 19:02:13 -0800 Subject: [PATCH 227/293] add cache alignment directives for contended variables --- ide/vs2019/mimalloc.vcxproj | 4 ++-- include/mimalloc-internal.h | 4 ++++ src/arena.c | 6 +++--- src/os.c | 4 ++-- src/segment.c | 26 +++++++++++++------------- test/test-stress.c | 6 +++--- 6 files changed, 27 insertions(+), 23 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 037e380d..a98e78ba 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -100,7 +100,7 @@ MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false - stdcpp17 + Default @@ -119,7 +119,7 @@ MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false - stdcpp17 + Default diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 3335414a..902d2fdf 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -20,16 +20,20 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_trace_message(...) #endif +#define MI_CACHE_LINE 64 #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) +#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) #elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread +#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) +#define mi_decl_cache_align #endif diff --git a/src/arena.c b/src/arena.c index acb92243..ac599f32 100644 --- a/src/arena.c +++ b/src/arena.c @@ -54,7 +54,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) // A memory arena descriptor -typedef struct mi_arena_s { +typedef mi_decl_cache_align struct mi_arena_s { _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) @@ -70,8 +70,8 @@ typedef struct mi_arena_s { // The available arenas -static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; -static _Atomic(uintptr_t) mi_arena_count; // = 0 +static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; +static mi_decl_cache_align _Atomic(uintptr_t) mi_arena_count; // = 0 /* ----------------------------------------------------------- diff --git a/src/os.c b/src/os.c index 6e8c12d8..b8dfaa70 100644 --- a/src/os.c +++ b/src/os.c @@ -397,7 +397,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // On 64-bit systems, we can do efficient aligned allocation by using // the 4TiB to 30TiB area to allocate them. #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) -static volatile _Atomic(uintptr_t) aligned_base; +static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { @@ -905,7 +905,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) #if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages -static _Atomic(uintptr_t) mi_huge_start; // = 0 +static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0 // Claim an aligned address range for huge pages static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { diff --git a/src/segment.c b/src/segment.c index 7aced87d..a26ac449 100644 --- a/src/segment.c +++ b/src/segment.c @@ -365,9 +365,6 @@ static void mi_reset_delayed(mi_segments_tld_t* tld) { } - - - /* ----------------------------------------------------------- Segment size calculations ----------------------------------------------------------- */ @@ -829,13 +826,15 @@ reuse their pages and/or free them eventually We maintain a global list of abandoned segments that are reclaimed on demand. Since this is shared among threads the implementation needs to avoid the A-B-A problem on -popping abandoned segments which is why tagged pointers are -used. +popping abandoned segments: +We use tagged pointers to avoid accidentially identifying +reused segments, much like stamped references in Java. +Secondly, we maintain a reader counter to avoid resetting +or decommitting segments that have a pending read operation. ----------------------------------------------------------- */ -// Use the bottom 20-bits (on 64-bit) of the aligned segment -// pointers to put in a tag that increments on update to avoid -// the A-B-A problem. +// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers +// to put in a tag that increments on update to avoid the A-B-A problem. #define MI_TAGGED_MASK MI_SEGMENT_MASK typedef uintptr_t mi_tagged_segment_t; @@ -850,16 +849,17 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se } // This is a list of visited abandoned pages that were full at the time. -// this list migrates to `abandoned` when that becomes NULL. -static volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL +// this list migrates to `abandoned` when that becomes NULL. The use of +// this list reduces contention and the rate at which segments are visited. +static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL -// The abandoned page list. -static volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL +// The abandoned page list (tagged as it supports pop) +static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL // We also maintain a count of current readers of the abandoned list // in order to prevent resetting/decommitting segment memory if it might // still be read. -static volatile _Atomic(uintptr_t) abandoned_readers; // = 0 +static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0 // Push on the visited list static void mi_abandoned_visited_push(mi_segment_t* segment) { diff --git a/test/test-stress.c b/test/test-stress.c index 72e4e853..19f10360 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -32,10 +32,10 @@ static int ITER = 50; // N full iterations destructing and re-creating a // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor -#define STRESS // undefine for leak test +// #define STRESS // undefine for leak test static bool allow_large_objects = true; // allow very large objects? -static size_t use_one_size = 1; // use single object size of `N * sizeof(uintptr_t)`? +static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? #ifdef USE_STD_MALLOC @@ -198,7 +198,7 @@ static void test_stress(void) { static void leak(intptr_t tid) { uintptr_t r = (43*tid)^ticks(); - void* p = alloc_items(pick(&r)%128, &r); + void* p = alloc_items(1 /*pick(&r)%128*/, &r); if (chance(50, &r)) { intptr_t i = (pick(&r) % TRANSFERS); void* q = atomic_exchange_ptr(&transfer[i], p); From 47300eeda3e78a909492f67f7c2b77289a7be383 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 20:17:33 -0800 Subject: [PATCH 228/293] avoid memset --- src/init.c | 7 ++++--- src/segment.c | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/init.c b/src/init.c index 18a18f60..366acbf5 100644 --- a/src/init.c +++ b/src/init.c @@ -140,6 +140,7 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL }; Initialization and freeing of the thread local heaps ----------------------------------------------------------- */ +// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size). typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; @@ -154,12 +155,13 @@ static bool _mi_heap_init(void) { mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); } else { - // use `_mi_os_alloc` to allocate directly from the OS + // use `_mi_os_alloc` to allocate directly from the OS mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation? if (td == NULL) { _mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n"); return false; } + // OS allocated so already zero initialized mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); @@ -168,8 +170,7 @@ static bool _mi_heap_init(void) { heap->cookie = _mi_heap_random_next(heap) | 1; heap->key[0] = _mi_heap_random_next(heap); heap->key[1] = _mi_heap_random_next(heap); - heap->tld = tld; - memset(tld, 0, sizeof(*tld)); + heap->tld = tld; tld->heap_backing = heap; tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; diff --git a/src/segment.c b/src/segment.c index a26ac449..f6554520 100644 --- a/src/segment.c +++ b/src/segment.c @@ -948,6 +948,7 @@ static mi_segment_t* mi_abandoned_pop(void) { // Do a pop. We use a reader count to prevent // a segment to be decommitted while a read is still pending, // and a tagged pointer to prevent A-B-A link corruption. + // (this is called from `memory.c:_mi_mem_free` for example) mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; do { From ecece572847f70553f2a2c8f9d754e1f16756986 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 20:20:43 -0800 Subject: [PATCH 229/293] fix bug in committed check in arena allocation --- src/arena.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index acb92243..55747bb1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -107,6 +107,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* size_t idx = mi_atomic_read(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around + // try to atomically claim a range of bits if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { mi_atomic_write(&arena->search_idx, idx); // start search from here next time return true; @@ -135,8 +136,8 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n // always committed *commit = true; } - else if (commit) { - // ensure commit now + else if (*commit) { + // arena not committed as a whole, but commit requested: ensure commit now bool any_uncommitted; mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { From 2b667bd3aef92ebda22a660e068798ce31b6eed4 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 25 Jan 2020 14:47:09 +0000 Subject: [PATCH 230/293] enable arc4random abi under apple --- src/random.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/random.c b/src/random.c index c40a96da..6fef2434 100644 --- a/src/random.c +++ b/src/random.c @@ -176,7 +176,7 @@ static bool os_random_buf(void* buf, size_t buf_len) { return true; } */ -#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \ +#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \ defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__wasi__) #include @@ -325,4 +325,4 @@ static void chacha_test(void) chacha_block(&r); mi_assert_internal(array_equals(r.output, r_out, 16)); } -*/ \ No newline at end of file +*/ From 5e32d00aab55449acfd2658256a7d6ddb1d1f446 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 12:26:08 -0800 Subject: [PATCH 231/293] add visit count to abandoned to limit list length --- include/mimalloc-types.h | 6 +++-- src/segment.c | 57 +++++++++++++++++++++++++++------------- test/test-stress.c | 7 ++--- 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 0c6dc666..48d86a25 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -241,10 +241,12 @@ typedef struct mi_segment_s { bool mem_is_committed; // `true` if the whole segment is eagerly committed // segment fields - struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` + struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` struct mi_segment_s* prev; struct mi_segment_s* abandoned_next; - size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) + size_t used; // count of pages in use (`used <= capacity`) size_t capacity; // count of available pages (`#free + used`) size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` diff --git a/src/segment.c b/src/segment.c index f6554520..715d632a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -831,6 +831,14 @@ We use tagged pointers to avoid accidentially identifying reused segments, much like stamped references in Java. Secondly, we maintain a reader counter to avoid resetting or decommitting segments that have a pending read operation. + +Note: the current implementation is one possible design; +another way might be to keep track of abandoned segments +in the regions. This would have the advantage of keeping +all concurrent code in one place and not needing to deal +with ABA issues. The drawback is that it is unclear how to +scan abandoned segments efficiently in that case as they +would be spread among all other segments in the regions. ----------------------------------------------------------- */ // Use the bottom 20-bits (on 64-bit) of the aligned segment pointers @@ -986,6 +994,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; segment->abandoned_next = NULL; + segment->abandoned_visits = 0; mi_abandoned_push(segment); } @@ -1009,6 +1018,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { Reclaim abandoned pages ----------------------------------------------------------- */ +// Possibly clear pages and check if free space is available static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, mi_segments_tld_t* tld) { mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); @@ -1045,13 +1055,13 @@ static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, m #define MI_RECLAIMED ((mi_segment_t*)1) -static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, mi_segments_tld_t* tld) { - UNUSED_RELEASE(page_kind); - mi_assert_internal(page_kind == segment->page_kind); +// Reclaim a segment +static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld) { mi_assert_internal(segment->abandoned_next == NULL); bool right_page_reclaimed = false; segment->thread_id = _mi_thread_id(); + segment->abandoned_visits = 0; mi_segments_track_size((long)segment->segment_size, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_expensive(mi_segment_is_valid(segment, tld)); @@ -1104,20 +1114,45 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } } +// Reclaim a segment without returning it +static void mi_segment_reclaim_force(mi_segment_t* segment, mi_heap_t* heap, mi_segments_tld_t* tld) { + mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, tld); + mi_assert_internal(res != MI_RECLAIMED); // due to block_size == 0 + if (res!=MI_RECLAIMED && res != NULL) { + mi_assert_internal(res == segment); + if (res->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(res)) { + mi_segment_insert_in_free_queue(res, tld); + } + } +} + +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { + mi_segment_t* segment; + while ((segment = mi_abandoned_pop()) != NULL) { + mi_segment_reclaim_force(segment, heap, tld); + } +} + + static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, mi_segments_tld_t* tld) { mi_segment_t* segment; int max_tries = 8; // limit the work to bound allocation times while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + segment->abandoned_visits++; bool has_page = mi_segment_pages_collect(segment,block_size,tld); // try to free up pages (due to concurrent frees) if (has_page && segment->page_kind == page_kind) { // found a free page of the right kind, or page of the right block_size with free space - return mi_segment_reclaim(segment, heap, block_size, page_kind, tld); + return mi_segment_reclaim(segment, heap, block_size, tld); } else if (segment->used==0) { // free the segment to make it available to other threads mi_segment_os_free(segment, segment->segment_size, tld); } + else if (segment->abandoned_visits >= 3) { + // always reclaim on 3rd visit to limit the list length + mi_segment_reclaim_force(segment, heap, tld); + } else { // push on the visited list so it gets not looked at too quickly again mi_abandoned_visited_push(segment); @@ -1126,20 +1161,6 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, return NULL; } -void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { - mi_segment_t* segment; - while ((segment = mi_abandoned_pop()) != NULL) { - mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, segment->page_kind, tld); - mi_assert_internal(res != NULL); - if (res != MI_RECLAIMED && res != NULL) { - mi_assert_internal(res == segment); - if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { - mi_segment_insert_in_free_queue(segment, tld); - } - } - } -} - /* ----------------------------------------------------------- Reclaim or allocate diff --git a/test/test-stress.c b/test/test-stress.c index 19f10360..ab4571db 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -32,7 +32,7 @@ static int ITER = 50; // N full iterations destructing and re-creating a // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor -// #define STRESS // undefine for leak test +#define STRESS // undefine for leak test static bool allow_large_objects = true; // allow very large objects? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? @@ -124,7 +124,7 @@ static void free_items(void* p) { static void stress(intptr_t tid) { //bench_start_thread(); - uintptr_t r = (tid * 43); // ^ ticks(); + uintptr_t r = (tid * 43); // rand(); const size_t max_item_shift = 5; // 128 const size_t max_item_retained_shift = max_item_shift + 2; size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more @@ -180,7 +180,8 @@ static void stress(intptr_t tid) { static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid)); static void test_stress(void) { - uintptr_t r = 43 * 43; + srand(0x7feb352d); + uintptr_t r = rand(); for (int n = 0; n < ITER; n++) { run_os_threads(THREADS, &stress); for (int i = 0; i < TRANSFERS; i++) { From f4630d43a71409f1963b910ffb247e137c42d85c Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 12:49:14 -0800 Subject: [PATCH 232/293] allow reset on large pages; check commit status before reset --- src/segment.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/segment.c b/src/segment.c index 715d632a..2d2263ea 100644 --- a/src/segment.c +++ b/src/segment.c @@ -231,7 +231,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { mi_assert_internal(page->is_committed); if (!mi_option_is_enabled(mi_option_page_reset)) return; - if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; + if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return; size_t psize; void* start = mi_segment_raw_page_start(segment, page, &psize); page->is_reset = true; @@ -281,12 +281,12 @@ static bool mi_page_reset_is_expired(mi_page_t* page, mi_msecs_t now) { } static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { - mi_assert_internal(!page->segment_in_use); + mi_assert_internal(!page->segment_in_use || !page->is_committed); mi_assert_internal(mi_page_not_in_queue(page,tld)); mi_assert_expensive(!mi_pages_reset_contains(page, tld)); mi_assert_internal(_mi_page_segment(page)==segment); if (!mi_option_is_enabled(mi_option_page_reset)) return; - if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; + if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return; if (mi_option_get(mi_option_reset_delay) == 0) { // reset immediately? @@ -782,7 +782,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a segment->used--; // add to the free page list for reuse/reset - if (allow_reset && segment->page_kind <= MI_PAGE_MEDIUM) { + if (allow_reset) { mi_pages_reset_add(segment, page, tld); } } @@ -1095,7 +1095,10 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, right_page_reclaimed = true; } } - } + } + else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet + mi_pages_reset_add(segment, page, tld); + } } mi_assert_internal(segment->abandoned == 0); if (right_page_reclaimed) { From 19a0d9dfa0f1ed1145d6943d971511b2a2d1060d Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 12:51:56 -0800 Subject: [PATCH 233/293] clean up stress test --- test/test-stress.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index ab4571db..1b559a59 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -59,7 +59,6 @@ const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; const uintptr_t cookie = 0x1ce4e5b9UL; #endif -static uintptr_t ticks(void); static void* atomic_exchange_ptr(volatile void** p, void* newval); typedef uintptr_t* random_t; @@ -180,7 +179,6 @@ static void stress(intptr_t tid) { static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid)); static void test_stress(void) { - srand(0x7feb352d); uintptr_t r = rand(); for (int n = 0; n < ITER; n++) { run_os_threads(THREADS, &stress); @@ -197,8 +195,9 @@ static void test_stress(void) { } } +#ifndef STRESS static void leak(intptr_t tid) { - uintptr_t r = (43*tid)^ticks(); + uintptr_t r = rand(); void* p = alloc_items(1 /*pick(&r)%128*/, &r); if (chance(50, &r)) { intptr_t i = (pick(&r) % TRANSFERS); @@ -207,7 +206,7 @@ static void leak(intptr_t tid) { } } -static void test_leak(void) { +static void test_leak(void) { for (int n = 0; n < ITER; n++) { run_os_threads(THREADS, &leak); mi_collect(false); @@ -216,6 +215,7 @@ static void test_leak(void) { #endif } } +#endif int main(int argc, char** argv) { // > mimalloc-test-stress [THREADS] [SCALE] [ITER] @@ -241,6 +241,7 @@ int main(int argc, char** argv) { //bench_start_program(); // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. + srand(0x7feb352d); mi_stats_reset(); #ifdef STRESS test_stress(); @@ -261,12 +262,6 @@ static void (*thread_entry_fun)(intptr_t) = &stress; #include -static uintptr_t ticks(void) { - LARGE_INTEGER t; - QueryPerformanceCounter(&t); - return (uintptr_t)t.QuadPart; -} - static DWORD WINAPI thread_entry(LPVOID param) { thread_entry_fun((intptr_t)param); return 0; @@ -331,18 +326,4 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) { } #endif -#include -#ifdef CLOCK_REALTIME -uintptr_t ticks(void) { - struct timespec t; - clock_gettime(CLOCK_REALTIME, &t); - return ((uintptr_t)t.tv_sec * 1000) + ((uintptr_t)t.tv_nsec / 1000000); -} -#else -// low resolution timer -uintptr_t _mi_clock_now(void) { - return ((uintptr_t)clock() / ((uintptr_t)CLOCKS_PER_SEC / 1000)); -} -#endif - #endif From 62b8fb26b11f7b5e496add0cc6c9c1c9da3e0791 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 13:27:47 -0800 Subject: [PATCH 234/293] fix freeing of segments on forced reclaim --- src/{memory.c => region.c} | 0 src/segment.c | 5 ++++- 2 files changed, 4 insertions(+), 1 deletion(-) rename src/{memory.c => region.c} (100%) diff --git a/src/memory.c b/src/region.c similarity index 100% rename from src/memory.c rename to src/region.c diff --git a/src/segment.c b/src/segment.c index 2d2263ea..e536ae59 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1123,7 +1123,10 @@ static void mi_segment_reclaim_force(mi_segment_t* segment, mi_heap_t* heap, mi_ mi_assert_internal(res != MI_RECLAIMED); // due to block_size == 0 if (res!=MI_RECLAIMED && res != NULL) { mi_assert_internal(res == segment); - if (res->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(res)) { + if (res->used == 0) { + mi_segment_free(segment, false, tld); + } + else if (res->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(res)) { mi_segment_insert_in_free_queue(res, tld); } } From 7785139201dbac8bc9515d7f5fa148f3e0c7827d Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 13:28:24 -0800 Subject: [PATCH 235/293] fix warning on gcc on attribute ignore in templates --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 21d0affc..7bf8099b 100644 --- a/src/arena.c +++ b/src/arena.c @@ -54,7 +54,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) // A memory arena descriptor -typedef mi_decl_cache_align struct mi_arena_s { +typedef struct mi_arena_s { _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) From 4faf412f53ac49ee04584b015c826a7bb1d67177 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 13:28:49 -0800 Subject: [PATCH 236/293] move 'memory.c' to 'region.c' --- CMakeLists.txt | 2 +- ide/vs2017/mimalloc-override.vcxproj | 4 ++-- ide/vs2017/mimalloc-override.vcxproj.filters | 4 ++-- ide/vs2017/mimalloc.vcxproj | 4 ++-- ide/vs2017/mimalloc.vcxproj.filters | 4 ++-- ide/vs2019/mimalloc-override.vcxproj | 4 ++-- ide/vs2019/mimalloc-override.vcxproj.filters | 4 ++-- ide/vs2019/mimalloc.vcxproj | 4 ++-- ide/vs2019/mimalloc.vcxproj.filters | 4 ++-- src/static.c | 2 +- 10 files changed, 18 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 95318a0e..b60e64a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ set(mi_sources src/random.c src/os.c src/arena.c - src/memory.c + src/region.c src/segment.c src/page.c src/alloc.c diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 4225a2f9..26c8080b 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -234,7 +234,7 @@ - + @@ -251,4 +251,4 @@ - \ No newline at end of file + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 037fbcbb..02652658 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -61,7 +61,7 @@ Source Files - + Source Files @@ -77,4 +77,4 @@ Source Files - \ No newline at end of file + diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index e08deec4..9d6af0e5 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -220,7 +220,7 @@ - + true @@ -245,4 +245,4 @@ - \ No newline at end of file + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 5fe74aa0..43660519 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -47,7 +47,7 @@ Source Files - + Source Files @@ -80,4 +80,4 @@ Header Files - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index ac19e321..17b6f4c0 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -237,7 +237,7 @@ - + @@ -254,4 +254,4 @@ - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index a8c5a5de..83d6f7fe 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -22,7 +22,7 @@ Source Files - + Source Files @@ -78,4 +78,4 @@ {39cb7e38-69d0-43fb-8406-6a0f7cefc3b4} - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index a98e78ba..a1372204 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -223,7 +223,7 @@ - + true @@ -248,4 +248,4 @@ - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index 61de4afe..4704fb2e 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -22,7 +22,7 @@ Source Files - + Source Files @@ -81,4 +81,4 @@ {852a14ae-6dde-4e95-8077-ca705e97e5af} - \ No newline at end of file + diff --git a/src/static.c b/src/static.c index 0519453e..ec9370eb 100644 --- a/src/static.c +++ b/src/static.c @@ -17,7 +17,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "random.c" #include "os.c" #include "arena.c" -#include "memory.c" +#include "region.c" #include "segment.c" #include "page.c" #include "heap.c" From 394b796ea0aec69b2f97ad51cce16ed432ca6e69 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 13:43:56 -0800 Subject: [PATCH 237/293] fix over-eager page reset in segment reclamation --- src/segment.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/segment.c b/src/segment.c index e536ae59..194aa793 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1019,26 +1019,18 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { ----------------------------------------------------------- */ // Possibly clear pages and check if free space is available -static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, mi_segments_tld_t* tld) +static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size) { mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); bool has_page = false; for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (page->segment_in_use) { - mi_assert_internal(!page->is_reset); - mi_assert_internal(page->is_committed); - mi_assert_internal(mi_page_not_in_queue(page, tld)); - mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); - mi_assert_internal(mi_page_heap(page) == NULL); - mi_assert_internal(page->next == NULL); + if (page->segment_in_use) { // ensure used count is up to date and collect potential concurrent frees _mi_page_free_collect(page, false); if (mi_page_all_free(page)) { - // if everything free already, clear the page directly - segment->abandoned--; - _mi_stat_decrease(&tld->stats->pages_abandoned, 1); - mi_segment_page_clear(segment, page, false, tld); // no (delayed) reset allowed (as the segment is still abandoned) + // if everything free already, page can be reused for some block size + // note: don't clear yet as we can only reset it once it is reclaimed has_page = true; } else if (page->xblock_size == block_size && page->used < page->reserved) { @@ -1047,6 +1039,7 @@ static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, m } } else { + // whole empty page has_page = true; } } @@ -1081,7 +1074,6 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, // set the heap again and allow delayed free again mi_page_set_heap(page, heap); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) - mi_assert_internal(!mi_page_all_free(page)); // TODO: should we not collect again given that we just collected? _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { @@ -1097,7 +1089,8 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } } else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet - mi_pages_reset_add(segment, page, tld); + // note: no not reset as this includes pages that were not touched before + // mi_pages_reset_add(segment, page, tld); } } mi_assert_internal(segment->abandoned == 0); @@ -1146,7 +1139,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, int max_tries = 8; // limit the work to bound allocation times while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { segment->abandoned_visits++; - bool has_page = mi_segment_pages_collect(segment,block_size,tld); // try to free up pages (due to concurrent frees) + bool has_page = mi_segment_check_free(segment,block_size); // try to free up pages (due to concurrent frees) if (has_page && segment->page_kind == page_kind) { // found a free page of the right kind, or page of the right block_size with free space return mi_segment_reclaim(segment, heap, block_size, tld); From d4927adddc2c3b748934d3e45c4ddb673c6076ee Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 14:30:02 -0800 Subject: [PATCH 238/293] add extra assertion that all segments are free on thread termination --- src/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/init.c b/src/init.c index 366acbf5..f8411187 100644 --- a/src/init.c +++ b/src/init.c @@ -203,6 +203,7 @@ static bool _mi_heap_done(mi_heap_t* heap) { // free if not the main thread if (heap != &_mi_heap_main) { + mi_assert_internal(heap->tld->segments.count == 0); _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main); } #if (MI_DEBUG > 0) From e628fc70676e8e2176fe66e8275480c14ad29ca3 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 26 Jan 2020 12:39:11 -0800 Subject: [PATCH 239/293] cleanup reclaim logic --- include/mimalloc-internal.h | 24 +++----- src/page.c | 40 +++++------- src/segment.c | 117 +++++++++++++++++++----------------- 3 files changed, 87 insertions(+), 94 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 902d2fdf..c7d7a1da 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -411,30 +411,24 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* return mi_tf_make(block, mi_tf_delayed(tf)); } -// are all blocks in a page freed? +// are all blocks in a page freed? +// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`. static inline bool mi_page_all_free(const mi_page_t* page) { mi_assert_internal(page != NULL); return (page->used == 0); } -// are there immediately available blocks +// are there any available blocks? +static inline bool mi_page_has_any_available(const mi_page_t* page) { + mi_assert_internal(page != NULL && page->reserved > 0); + return (page->used < page->reserved || (mi_page_thread_free(page) != NULL)); +} + +// are there immediately available blocks, i.e. blocks available on the free list. static inline bool mi_page_immediate_available(const mi_page_t* page) { mi_assert_internal(page != NULL); return (page->free != NULL); } -// are there free blocks in this page? -static inline bool mi_page_has_free(mi_page_t* page) { - mi_assert_internal(page != NULL); - bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_page_thread_free(page) != NULL)); - mi_assert_internal(hasfree || page->used == page->capacity); - return hasfree; -} - -// are all blocks in use? -static inline bool mi_page_all_used(mi_page_t* page) { - mi_assert_internal(page != NULL); - return !mi_page_has_free(page); -} // is more than 7/8th of a page in use? static inline bool mi_page_mostly_used(const mi_page_t* page) { diff --git a/src/page.c b/src/page.c index c5b86b08..e552a61e 100644 --- a/src/page.c +++ b/src/page.c @@ -234,6 +234,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(!page->is_reset); + // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); @@ -245,28 +246,16 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size mi_assert_internal(pq==NULL||block_size == pq->block_size); mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os); if (page == NULL) { - // this may be out-of-memory, or a page was reclaimed - if (pq!=NULL && (page = pq->first) != NULL) { - mi_assert_expensive(_mi_page_is_valid(page)); - if (!mi_page_immediate_available(page)) { - mi_page_extend_free(heap, page, heap->tld); - } - mi_assert_internal(mi_page_immediate_available(page)); - if (mi_page_immediate_available(page)) { - return page; // reclaimed page - } - } - return NULL; // out-of-memory - } - else { - // a fresh page was allocated, initialize it - mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_page_init(heap, page, block_size, heap->tld); - _mi_stat_increase(&heap->tld->stats.pages, 1); - if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL - mi_assert_expensive(_mi_page_is_valid(page)); - return page; + // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue) + return NULL; } + // a fresh page was found, initialize it + mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); + mi_page_init(heap, page, block_size, heap->tld); + _mi_stat_increase(&heap->tld->stats.pages, 1); + if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL + mi_assert_expensive(_mi_page_is_valid(page)); + return page; } // Get a fresh page to use @@ -648,7 +637,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi -------------------------------------------------------------*/ // Find a page with free blocks of `page->block_size`. -static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) +static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) { // search through the pages in "next fit" order size_t count = 0; @@ -686,13 +675,16 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p if (page == NULL) { _mi_heap_collect_retired(heap, false); // perhaps make a page available page = mi_page_fresh(heap, pq); + if (page == NULL && first_try) { + // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again + page = mi_page_queue_find_free_ex(heap, pq, false); + } } else { mi_assert(pq->first == page); page->retire_expire = 0; } mi_assert_internal(page == NULL || mi_page_immediate_available(page)); - return page; } @@ -716,7 +708,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { return page; // fast path } } - return mi_page_queue_find_free_ex(heap, pq); + return mi_page_queue_find_free_ex(heap, pq, true); } diff --git a/src/segment.c b/src/segment.c index 194aa793..c7a9662b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -669,6 +669,11 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // set protection mi_segment_protect(segment, true, tld->os); + // insert in free lists for small and medium pages + if (page_kind <= MI_PAGE_MEDIUM) { + mi_segment_insert_in_free_queue(segment, tld); + } + //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } @@ -1019,21 +1024,25 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { ----------------------------------------------------------- */ // Possibly clear pages and check if free space is available -static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size) +static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free) { mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); bool has_page = false; + size_t pages_used = 0; + size_t pages_used_empty = 0; for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { + pages_used++; // ensure used count is up to date and collect potential concurrent frees _mi_page_free_collect(page, false); if (mi_page_all_free(page)) { // if everything free already, page can be reused for some block size - // note: don't clear yet as we can only reset it once it is reclaimed + // note: don't clear the page yet as we can only OS reset it once it is reclaimed + pages_used_empty++; has_page = true; } - else if (page->xblock_size == block_size && page->used < page->reserved) { + else if (page->xblock_size == block_size && mi_page_has_any_available(page)) { // a page has available free blocks of the right size has_page = true; } @@ -1043,15 +1052,19 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size) has_page = true; } } + mi_assert_internal(pages_used == segment->used && pages_used >= pages_used_empty); + if (all_pages_free != NULL) { + *all_pages_free = ((pages_used - pages_used_empty) == 0); + } return has_page; } -#define MI_RECLAIMED ((mi_segment_t*)1) -// Reclaim a segment -static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld) { +// Reclaim a segment; returns NULL if the segment was freed +// set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. +static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { mi_assert_internal(segment->abandoned_next == NULL); - bool right_page_reclaimed = false; + if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } segment->thread_id = _mi_thread_id(); segment->abandoned_visits = 0; @@ -1071,10 +1084,10 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); - // set the heap again and allow delayed free again + // set the heap again and allow heap thread delayed free again. mi_page_set_heap(page, heap); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) - // TODO: should we not collect again given that we just collected? + // TODO: should we not collect again given that we just collected in `check_free`? _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { // if everything free already, clear the page directly @@ -1083,77 +1096,67 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, else { // otherwise reclaim it into the heap _mi_page_reclaim(heap, page); - if (block_size == page->xblock_size) { - right_page_reclaimed = true; + if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) { + if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } } } } else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet - // note: no not reset as this includes pages that were not touched before + // note: do not reset as this includes pages that were not touched before // mi_pages_reset_add(segment, page, tld); } } mi_assert_internal(segment->abandoned == 0); - if (right_page_reclaimed) { - // add the segment's free pages to the free small segment queue + if (segment->used == 0) { + mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed)); + mi_segment_free(segment, false, tld); + return NULL; + } + else { if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { mi_segment_insert_in_free_queue(segment, tld); } - // and return reclaimed: at the page allocation the page is already in the queue now - return MI_RECLAIMED; - } - else { - // otherwise return the segment as it will contain some free pages - // (except for abandoned_reclaim_all which uses a block_size of zero) - mi_assert_internal(segment->used < segment->capacity || block_size == 0); return segment; } } -// Reclaim a segment without returning it -static void mi_segment_reclaim_force(mi_segment_t* segment, mi_heap_t* heap, mi_segments_tld_t* tld) { - mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, tld); - mi_assert_internal(res != MI_RECLAIMED); // due to block_size == 0 - if (res!=MI_RECLAIMED && res != NULL) { - mi_assert_internal(res == segment); - if (res->used == 0) { - mi_segment_free(segment, false, tld); - } - else if (res->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(res)) { - mi_segment_insert_in_free_queue(res, tld); - } - } -} void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { mi_segment_t* segment; while ((segment = mi_abandoned_pop()) != NULL) { - mi_segment_reclaim_force(segment, heap, tld); + mi_segment_reclaim(segment, heap, 0, NULL, tld); } } - -static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, mi_segments_tld_t* tld) +static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, bool* reclaimed, mi_segments_tld_t* tld) { + *reclaimed = false; mi_segment_t* segment; int max_tries = 8; // limit the work to bound allocation times while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { segment->abandoned_visits++; - bool has_page = mi_segment_check_free(segment,block_size); // try to free up pages (due to concurrent frees) - if (has_page && segment->page_kind == page_kind) { - // found a free page of the right kind, or page of the right block_size with free space - return mi_segment_reclaim(segment, heap, block_size, tld); + bool all_pages_free; + bool has_page = mi_segment_check_free(segment,block_size,&all_pages_free); // try to free up pages (due to concurrent frees) + if (all_pages_free) { + // free the segment (by forced reclaim) to make it available to other threads. + // note1: we prefer to free a segment as that might lead to reclaiming another + // segment that is still partially used. + // note2: we could in principle optimize this by skipping reclaim and directly + // freeing but that would violate some invariants temporarily) + mi_segment_reclaim(segment, heap, 0, NULL, tld); } - else if (segment->used==0) { - // free the segment to make it available to other threads - mi_segment_os_free(segment, segment->segment_size, tld); + else if (has_page && segment->page_kind == page_kind) { + // found a free page of the right kind, or page of the right block_size with free space + // we return the result of reclaim (which is usually `segment`) as it might free + // the segment due to concurrent frees (in which case `NULL` is returned). + return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); } else if (segment->abandoned_visits >= 3) { - // always reclaim on 3rd visit to limit the list length - mi_segment_reclaim_force(segment, heap, tld); + // always reclaim on 3rd visit to limit the list length. + mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { - // push on the visited list so it gets not looked at too quickly again + // otherwise, push on the visited list so it gets not looked at too quickly again mi_abandoned_visited_push(segment); } } @@ -1176,12 +1179,16 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s return segment; } // 2. try to reclaim an abandoned segment - segment = mi_segment_try_reclaim(heap, block_size, page_kind, tld); - if (segment == MI_RECLAIMED) { - return NULL; // pretend out-of-memory as the page will be in the page queue of the heap + bool reclaimed; + segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld); + if (reclaimed) { + // reclaimed the right page right into the heap + mi_assert_internal(segment != NULL && segment->page_kind == page_kind && page_kind <= MI_PAGE_LARGE); + return NULL; // pretend out-of-memory as the page will be in the page queue of the heap with available blocks } else if (segment != NULL) { - return segment; // reclaimed a segment with empty pages in it + // reclaimed a segment with empty pages (of `page_kind`) in it + return segment; } // 3. otherwise allocate a fresh segment return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); @@ -1216,12 +1223,12 @@ static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_p // find an available segment the segment free queue mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); if (mi_segment_queue_is_empty(free_queue)) { - // possibly allocate a fresh segment - mi_segment_t* segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld); + // possibly allocate or reclaim a fresh segment + mi_segment_t* const segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld); if (segment == NULL) return NULL; // return NULL if out-of-memory (or reclaimed) + mi_assert_internal(free_queue->first == segment); mi_assert_internal(segment->page_kind==kind); mi_assert_internal(segment->used < segment->capacity); - mi_segment_enqueue(free_queue, segment); } mi_assert_internal(free_queue->first != NULL); mi_page_t* const page = mi_segment_page_alloc_in(free_queue->first, tld); From 42586de10437308293f5967cc4c6527c0d67a76c Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 27 Jan 2020 23:13:57 -0800 Subject: [PATCH 240/293] fix is_zero setting in regions --- src/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index 96047b79..55122887 100644 --- a/src/memory.c +++ b/src/memory.c @@ -284,7 +284,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_assert_internal(!(info.x.is_large && !*is_large)); mi_assert_internal(start != NULL); - *is_zero = mi_bitmap_unclaim(®ion->dirty, 1, blocks, bit_idx); + *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); *is_large = info.x.is_large; *memid = mi_memid_create(region, bit_idx); void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); From 9c166d88f0ca6ce5322856e58ac730972ca5404f Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 27 Jan 2020 23:15:24 -0800 Subject: [PATCH 241/293] increase retire page size --- src/page.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/page.c b/src/page.c index 149926e8..28e5dfdb 100644 --- a/src/page.c +++ b/src/page.c @@ -386,6 +386,8 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_segment_page_free(page, force, segments_tld); } +#define MI_MAX_RETIRE_SIZE (4*MI_SMALL_SIZE_MAX) + // Retire a page with no more used blocks // Important to not retire too quickly though as new // allocations might coming. @@ -406,7 +408,7 @@ void _mi_page_retire(mi_page_t* page) { // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if (mi_likely(page->xblock_size <= MI_SMALL_SIZE_MAX && !mi_page_is_in_full(page))) { + if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); page->retire_expire = 16; @@ -421,7 +423,7 @@ void _mi_page_retire(mi_page_t* page) { // free retired pages: we don't need to look at the entire queues // since we only retire pages that are the last one in a queue. void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { - for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) { + for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_MAX_RETIRE_SIZE; pq++) { mi_page_t* page = pq->first; if (page != NULL && page->retire_expire != 0) { if (mi_page_all_free(page)) { From 5d212d688f82a3b17f00faa11967e9459dc78715 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 29 Jan 2020 17:10:57 -0800 Subject: [PATCH 242/293] add MI_PADDING build option to add padding to each block to detect heap block overflows --- include/mimalloc-types.h | 18 +++++++++++++---- src/alloc.c | 40 +++++++++++++++++++++++++++++++++---- test/main-override-static.c | 7 +++++++ 3 files changed, 57 insertions(+), 8 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 48d86a25..39debae1 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -12,6 +12,10 @@ terms of the MIT license. A copy of the license can be found in the file #include // uintptr_t, uint16_t, etc #include // _Atomic +// Minimal alignment necessary. On most platforms 16 bytes are needed +// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` +#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) + // ------------------------------------------------------ // Variants // ------------------------------------------------------ @@ -50,6 +54,16 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_ENCODE_FREELIST 1 #endif +// Reserve extra padding at the end of each block; must be a multiple of `sizeof(intptr_t)`! +// If free lists are encoded, the padding is checked if it was modified on free. +#if (!defined(MI_PADDING)) +#if (MI_SECURE>=3 || MI_DEBUG>=1) +#define MI_PADDING MI_MAX_ALIGN_SIZE +#else +#define MI_PADDING 0 +#endif +#endif + // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ @@ -113,10 +127,6 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) #define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) -// Minimal alignment necessary. On most platforms 16 bytes are needed -// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` -#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) - // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (73U) diff --git a/src/alloc.c b/src/alloc.c index 3f577f2f..e4324d73 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -42,6 +42,11 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz size_t bin = _mi_bin(size); mi_heap_stat_increase(heap,normal[bin], 1); } +#endif +#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) + mi_assert_internal((MI_PADDING % sizeof(mi_block_t*)) == 0); + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); + mi_block_set_nextx(page, padding, block, page->key[0], page->key[1]); #endif return block; } @@ -54,6 +59,9 @@ extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size } extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept { +#if (MI_PADDING>0) + size += MI_PADDING; +#endif return mi_heap_malloc_small(mi_get_default_heap(), size); } @@ -69,6 +77,9 @@ mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept { extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { mi_assert(heap!=NULL); mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local +#if (MI_PADDING>0) + size += MI_PADDING; +#endif void* p; if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { p = mi_heap_malloc_small(heap, size); @@ -99,11 +110,11 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page))); + mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING)); } else { // otherwise memset - memset(p, 0, mi_page_block_size(page)); + memset(p, 0, mi_page_block_size(page) - MI_PADDING); } } @@ -171,6 +182,20 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block } #endif +#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); + mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]); + if (decoded != block) { + _mi_error_message(EINVAL, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size); + } +} +#else +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + UNUSED(page); + UNUSED(block); +} +#endif // ------------------------------------------------------ // Free @@ -214,6 +239,8 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc return; } + mi_check_padding(page, block); + mi_thread_free_t tfree; mi_thread_free_t tfreex; bool use_delayed; @@ -258,13 +285,14 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { #if (MI_DEBUG) - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING); #endif // and push it on the free list if (mi_likely(local)) { // owning thread can free a block directly if (mi_unlikely(mi_check_is_double_free(page, block))) return; + mi_check_padding(page, block); mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -341,6 +369,7 @@ void mi_free(void* p) mi_attr_noexcept // local, and not full or aligned mi_block_t* const block = (mi_block_t*)p; if (mi_unlikely(mi_check_is_double_free(page,block))) return; + mi_check_padding(page, block); mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -381,8 +410,11 @@ bool _mi_free_delayed_block(mi_block_t* block) { size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; const mi_segment_t* segment = _mi_ptr_segment(p); - const mi_page_t* page = _mi_segment_page_of(segment,p); + const mi_page_t* page = _mi_segment_page_of(segment, p); size_t size = mi_page_block_size(page); +#if defined(MI_PADDING) + size -= MI_PADDING; +#endif if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); diff --git a/test/main-override-static.c b/test/main-override-static.c index 54a5ea66..a1c3edee 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -10,6 +10,7 @@ static void double_free1(); static void double_free2(); static void corrupt_free(); +static void block_overflow1(); int main() { mi_version(); @@ -18,6 +19,7 @@ int main() { // double_free1(); // double_free2(); // corrupt_free(); + // block_overflow1(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -41,6 +43,11 @@ int main() { return 0; } +static void block_overflow1() { + void* p = mi_malloc(16); + memset(p, 0, 17); + free(p); +} // The double free samples come ArcHeap [1] by Insu Yun (issue #161) // [1]: https://arxiv.org/pdf/1903.00503.pdf From 7ff3ec2bf74b9014279103a55b632df182dacc7c Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 29 Jan 2020 17:25:40 -0800 Subject: [PATCH 243/293] use EFAULT for buffer overflow and call abort in debug mode (as well as secure mode) --- src/alloc.c | 2 +- src/options.c | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/alloc.c b/src/alloc.c index e4324d73..6852d652 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -187,7 +187,7 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]); if (decoded != block) { - _mi_error_message(EINVAL, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size); + _mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size); } } #else diff --git a/src/options.c b/src/options.c index af051aa2..7559a4b5 100644 --- a/src/options.c +++ b/src/options.c @@ -319,6 +319,14 @@ static volatile _Atomic(void*) mi_error_arg; // = NULL static void mi_error_default(int err) { UNUSED(err); +#if (MI_DEBUG>0) + if (err==EFAULT) { + #ifdef _MSC_VER + __debugbreak(); + #endif + abort(); + } +#endif #if (MI_SECURE>0) if (err==EFAULT) { // abort on serious errors in secure mode (corrupted meta-data) abort(); From 03b363a1c289ad4461c219050466a9f7de0b8432 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 29 Jan 2020 22:46:44 -0800 Subject: [PATCH 244/293] first working tls on macOS using interpose; still slow --- CMakeLists.txt | 2 +- include/mimalloc-internal.h | 38 +++++++++++------- src/alloc-override.c | 7 +++- src/alloc.c | 2 +- src/init.c | 62 ++++++++++++++++------------ src/options.c | 32 ++++++++++----- src/random.c | 34 ++++++++-------- src/segment.c | 80 ++++++++++++++++++------------------- test/test-stress.c | 18 ++++----- 9 files changed, 155 insertions(+), 120 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b60e64a4..2da7974b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -247,7 +247,7 @@ if (MI_BUILD_TESTS MATCHES "ON") target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines}) target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress PRIVATE include) - target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static ${mi_libraries}) + target_link_libraries(mimalloc-test-stress PRIVATE mimalloc ${mi_libraries}) enable_testing() add_test(test_api, mimalloc-test-api) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index c7d7a1da..f4b578f6 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -33,7 +33,7 @@ terms of the MIT license. A copy of the license can be found in the file #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) -#define mi_decl_cache_align +#define mi_decl_cache_align #endif @@ -51,6 +51,7 @@ void _mi_random_init(mi_random_ctx_t* ctx); void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); uintptr_t _mi_random_next(mi_random_ctx_t* ctx); uintptr_t _mi_heap_random_next(mi_heap_t* heap); +uintptr_t _os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c @@ -233,7 +234,7 @@ static inline size_t _mi_wsize_from_size(size_t size) { // Overflow detecting multiply -static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 #include // UINT_MAX, ULONG_MAX #if (SIZE_MAX == UINT_MAX) @@ -274,18 +275,24 @@ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value o extern mi_heap_t _mi_heap_main; // statically allocated main backing heap extern bool _mi_process_is_initialized; -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from -static inline mi_heap_t* mi_get_default_heap(void) { #ifdef MI_TLS_RECURSE_GUARD +extern mi_heap_t* _mi_get_default_heap_tls_safe(void); +static inline mi_heap_t* mi_get_default_heap(void) { // on some BSD platforms, like macOS, the dynamic loader calls `malloc` // to initialize thread local data. To avoid recursion, we need to avoid // accessing the thread local `_mi_default_heap` until our module is loaded // and use the statically allocated main heap until that time. // TODO: patch ourselves dynamically to avoid this check every time? - if (!_mi_process_is_initialized) return &_mi_heap_main; -#endif + return _mi_get_default_heap_tls_safe(); +#else + +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from + +static inline mi_heap_t* mi_get_default_heap(void) { return _mi_heap_default; + +#endif } static inline bool mi_heap_is_default(const mi_heap_t* heap) { @@ -302,6 +309,7 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) { } static inline uintptr_t _mi_ptr_cookie(const void* p) { + mi_assert_internal(_mi_heap_main.cookie != 0); return ((uintptr_t)p ^ _mi_heap_main.cookie); } @@ -345,7 +353,7 @@ static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, con // Get the page containing the pointer static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { - uintptr_t idx = _mi_segment_page_idx_of(segment, p); + uintptr_t idx = _mi_segment_page_idx_of(segment, p); return &((mi_segment_t*)segment)->pages[idx]; } @@ -411,14 +419,14 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* return mi_tf_make(block, mi_tf_delayed(tf)); } -// are all blocks in a page freed? +// are all blocks in a page freed? // note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`. static inline bool mi_page_all_free(const mi_page_t* page) { mi_assert_internal(page != NULL); return (page->used == 0); } -// are there any available blocks? +// are there any available blocks? static inline bool mi_page_has_any_available(const mi_page_t* page) { mi_assert_internal(page != NULL && page->reserved > 0); return (page->used < page->reserved || (mi_page_thread_free(page) != NULL)); @@ -466,11 +474,11 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { /* ------------------------------------------------------------------- Encoding/Decoding the free list next pointers -This is to protect against buffer overflow exploits where the -free list is mutated. Many hardened allocators xor the next pointer `p` +This is to protect against buffer overflow exploits where the +free list is mutated. Many hardened allocators xor the next pointer `p` with a secret key `k1`, as `p^k1`. This prevents overwriting with known -values but might be still too weak: if the attacker can guess -the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). +values but might be still too weak: if the attacker can guess +the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). Moreover, if multiple blocks can be read as well, the attacker can xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot about the pointers (and subsequently `k1`). @@ -478,9 +486,9 @@ about the pointers (and subsequently `k1`). Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<< struct mi_interpose_s { @@ -54,7 +58,7 @@ terms of the MIT license. A copy of the license can be found in the file MI_INTERPOSE_MI(malloc), MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(realloc), - MI_INTERPOSE_MI(free), + MI_INTERPOSEX(free,mi_free_tls_safe), MI_INTERPOSE_MI(strdup), MI_INTERPOSE_MI(strndup) }; @@ -194,4 +198,3 @@ int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_me #endif #endif // MI_MALLOC_OVERRIDE && !_WIN32 - diff --git a/src/alloc.c b/src/alloc.c index 3f577f2f..d60c33bf 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. -extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { +extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* block = page->free; if (mi_unlikely(block == NULL)) { diff --git a/src/init.c b/src/init.c index f8411187..922b7438 100644 --- a/src/init.c +++ b/src/init.c @@ -104,9 +104,9 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; static mi_tld_t tld_main = { 0, false, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, - 0, 0, 0, 0, 0, 0, NULL, - tld_main_stats, tld_main_os + { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, + 0, 0, 0, 0, 0, 0, NULL, + tld_main_stats, tld_main_os }, // segments { 0, tld_main_stats }, // os { MI_STATS_NULL } // stats @@ -124,9 +124,9 @@ mi_heap_t _mi_heap_main = { MI_PAGE_QUEUES_EMPTY, ATOMIC_VAR_INIT(NULL), 0, // thread id - MI_INIT_COOKIE, // initial cookie - { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) - { {0}, {0}, 0 }, // random + 0, // initial cookie + { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) + { {0x846ca68b}, {0}, 0 }, // random 0, // page count false // can reclaim }; @@ -148,14 +148,15 @@ typedef struct mi_thread_data_s { // Initialize the thread local default heap, called from `mi_thread_init` static bool _mi_heap_init(void) { - if (mi_heap_is_initialized(_mi_heap_default)) return true; + if (mi_heap_is_initialized(mi_get_default_heap())) return true; if (_mi_is_main_thread()) { + mi_assert_internal(_mi_heap_main.thread_id != 0); // the main heap is statically allocated _mi_heap_set_default_direct(&_mi_heap_main); - mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); + //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); } else { - // use `_mi_os_alloc` to allocate directly from the OS + // use `_mi_os_alloc` to allocate directly from the OS mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation? if (td == NULL) { _mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n"); @@ -170,7 +171,7 @@ static bool _mi_heap_init(void) { heap->cookie = _mi_heap_random_next(heap) | 1; heap->key[0] = _mi_heap_random_next(heap); heap->key[1] = _mi_heap_random_next(heap); - heap->tld = tld; + heap->tld = tld; tld->heap_backing = heap; tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; @@ -265,8 +266,9 @@ static void _mi_thread_done(mi_heap_t* default_heap); #endif // Set up handlers so `mi_thread_done` is called automatically +static bool tls_initialized = false; // fine if it races + static void mi_process_setup_auto_thread_done(void) { - static bool tls_initialized = false; // fine if it races if (tls_initialized) return; tls_initialized = true; #if defined(_WIN32) && defined(MI_SHARED_LIB) @@ -317,7 +319,9 @@ static void _mi_thread_done(mi_heap_t* heap) { void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(heap != NULL); + #ifndef MI_TLS_RECURSE_GUARD _mi_heap_default = heap; + #endif // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. @@ -330,7 +334,11 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { #endif } - +mi_heap_t* _mi_get_default_heap_tls_safe(void) { + if (mi_unlikely(mi_pthread_key==0)) return (mi_heap_t*)&_mi_heap_empty; + mi_heap_t* heap = pthread_getspecific(mi_pthread_key); + return (mi_likely(heap!=NULL) ? heap : (mi_heap_t*)&_mi_heap_empty); +} // -------------------------------------------------------- // Run functions on process init/done, and thread init/done @@ -339,6 +347,7 @@ static void mi_process_done(void); static bool os_preloading = true; // true until this module is initialized static bool mi_redirected = false; // true if malloc redirects to mi_malloc +bool _mi_tls_initialized = false; // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. bool _mi_preloading() { @@ -383,7 +392,10 @@ static void mi_allocator_done() { // Called once by the process loader static void mi_process_load(void) { + volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; + UNUSED(dummy); os_preloading = false; + _mi_tls_initialized = true; atexit(&mi_process_done); _mi_options_init(); mi_process_init(); @@ -398,26 +410,26 @@ static void mi_process_load(void) { } } +void _mi_heap_main_init(void) { + if (_mi_heap_main.cookie == 0) { + _mi_heap_main.thread_id = _mi_thread_id(); + _mi_heap_main.cookie = _os_random_weak((uintptr_t)&_mi_heap_main_init); + _mi_random_init(&_mi_heap_main.random); + _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main); + } +} + // Initialize the process; called by thread_init or the process loader void mi_process_init(void) mi_attr_noexcept { // ensure we are called once if (_mi_process_is_initialized) return; - // access _mi_heap_default before setting _mi_process_is_initialized to ensure - // that the TLS slot is allocated without getting into recursion on macOS - // when using dynamic linking with interpose. - mi_get_default_heap(); _mi_process_is_initialized = true; - - _mi_heap_main.thread_id = _mi_thread_id(); - _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); - _mi_random_init(&_mi_heap_main.random); - #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. - _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main); - #endif mi_process_setup_auto_thread_done(); + + _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); _mi_os_init(); + _mi_heap_main_init(); #if (MI_DEBUG) _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif diff --git a/src/options.c b/src/options.c index af051aa2..c0bf9680 100644 --- a/src/options.c +++ b/src/options.c @@ -53,7 +53,7 @@ static mi_option_desc_t options[_mi_option_last] = // stable options { MI_DEBUG, UNINIT, MI_OPTION(show_errors) }, { 0, UNINIT, MI_OPTION(show_stats) }, - { 0, UNINIT, MI_OPTION(verbose) }, + { 1, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand @@ -239,16 +239,30 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT // inside the C runtime causes another message. static mi_decl_thread bool recurse = false; +static bool mi_recurse_enter(void) { + #ifdef MI_TLS_RECURSE_GUARD + if (_mi_preloading()) return true; + #endif + if (recurse) return false; + recurse = true; + return true; +} + +static void mi_recurse_exit(void) { + #ifdef MI_TLS_RECURSE_GUARD + if (_mi_preloading()) return; + #endif + recurse = false; +} + void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) { - if (recurse) return; + if (!mi_recurse_enter()) return; if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr? out = mi_out_get_default(&arg); } - recurse = true; if (prefix != NULL) out(prefix,arg); out(message,arg); - recurse = false; - return; + mi_recurse_exit(); } // Define our own limited `fprintf` that avoids memory allocation. @@ -256,14 +270,12 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) { char buf[512]; if (fmt==NULL) return; - if (recurse) return; - recurse = true; + if (!mi_recurse_enter()) return; vsnprintf(buf,sizeof(buf)-1,fmt,args); - recurse = false; + mi_recurse_exit(); _mi_fputs(out,arg,prefix,buf); } - void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { va_list args; va_start(args,fmt); @@ -290,7 +302,7 @@ void _mi_verbose_message(const char* fmt, ...) { static void mi_show_error_message(const char* fmt, va_list args) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; if (mi_atomic_increment(&error_count) > mi_max_error_count) return; - mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); } void _mi_warning_message(const char* fmt, ...) { diff --git a/src/random.c b/src/random.c index 6fef2434..b3dbf4f8 100644 --- a/src/random.c +++ b/src/random.c @@ -11,7 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- We use our own PRNG to keep predictable performance of random number generation -and to avoid implementations that use a lock. We only use the OS provided +and to avoid implementations that use a lock. We only use the OS provided random source to initialize the initial seeds. Since we do not need ultimate performance but we do rely on the security (for secret cookies in secure mode) we use a cryptographically secure generator (chacha20). @@ -21,11 +21,11 @@ we use a cryptographically secure generator (chacha20). /* ---------------------------------------------------------------------------- -Chacha20 implementation as the original algorithm with a 64-bit nonce +Chacha20 implementation as the original algorithm with a 64-bit nonce and counter: https://en.wikipedia.org/wiki/Salsa20 The input matrix has sixteen 32-bit values: Position 0 to 3: constant key -Position 4 to 11: the key +Position 4 to 11: the key Position 12 to 13: the counter. Position 14 to 15: the nonce. @@ -44,8 +44,8 @@ static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7); } -static void chacha_block(mi_random_ctx_t* ctx) -{ +static void chacha_block(mi_random_ctx_t* ctx) +{ // scramble into `x` uint32_t x[16]; for (size_t i = 0; i < 16; i++) { @@ -72,8 +72,8 @@ static void chacha_block(mi_random_ctx_t* ctx) ctx->input[12] += 1; if (ctx->input[12] == 0) { ctx->input[13] += 1; - if (ctx->input[13] == 0) { // and keep increasing into the nonce - ctx->input[14] += 1; + if (ctx->input[13] == 0) { // and keep increasing into the nonce + ctx->input[14] += 1; } } } @@ -83,7 +83,7 @@ static uint32_t chacha_next32(mi_random_ctx_t* ctx) { chacha_block(ctx); ctx->output_available = 16; // (assign again to suppress static analysis warning) } - const uint32_t x = ctx->output[16 - ctx->output_available]; + const uint32_t x = ctx->output[16 - ctx->output_available]; ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out ctx->output_available--; return x; @@ -94,9 +94,9 @@ static inline uint32_t read32(const uint8_t* p, size_t idx32) { return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24); } -static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce) +static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce) { - // since we only use chacha for randomness (and not encryption) we + // since we only use chacha for randomness (and not encryption) we // do not _need_ to read 32-bit values as little endian but we do anyways // just for being compatible :-) memset(ctx, 0, sizeof(*ctx)); @@ -110,7 +110,7 @@ static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t no ctx->input[12] = 0; ctx->input[13] = 0; ctx->input[14] = (uint32_t)nonce; - ctx->input[15] = (uint32_t)(nonce >> 32); + ctx->input[15] = (uint32_t)(nonce >> 32); } static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) { @@ -184,7 +184,7 @@ static bool os_random_buf(void* buf, size_t buf_len) { arc4random_buf(buf, buf_len); return true; } -#elif defined(__linux__) +#elif defined(__linux__) #include #include #include @@ -241,8 +241,8 @@ static bool os_random_buf(void* buf, size_t buf_len) { #include #endif -static uintptr_t os_random_weak(uintptr_t extra_seed) { - uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random +uintptr_t _os_random_weak(uintptr_t extra_seed) { + uintptr_t x = (uintptr_t)&_os_random_weak ^ extra_seed; // ASLR makes the address random #if defined(_WIN32) LARGE_INTEGER pcount; QueryPerformanceCounter(&pcount); @@ -267,10 +267,10 @@ static uintptr_t os_random_weak(uintptr_t extra_seed) { void _mi_random_init(mi_random_ctx_t* ctx) { uint8_t key[32]; if (!os_random_buf(key, sizeof(key))) { - // if we fail to get random data from the OS, we fall back to a + // if we fail to get random data from the OS, we fall back to a // weak random source based on the current time _mi_warning_message("unable to use secure randomness\n"); - uintptr_t x = os_random_weak(0); + uintptr_t x = _os_random_weak(0); for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. x = _mi_random_shuffle(x); ((uint32_t*)key)[i] = (uint32_t)x; @@ -280,7 +280,7 @@ void _mi_random_init(mi_random_ctx_t* ctx) { } /* -------------------------------------------------------- -test vectors from +test vectors from ----------------------------------------------------------- */ /* static bool array_equals(uint32_t* x, uint32_t* y, size_t n) { diff --git a/src/segment.c b/src/segment.c index c7a9662b..0e70c3bf 100644 --- a/src/segment.c +++ b/src/segment.c @@ -17,9 +17,9 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_ /* -------------------------------------------------------------------------------- Segment allocation - We allocate pages inside bigger "segments" (4mb on 64-bit). This is to avoid - splitting VMA's on Linux and reduce fragmentation on other OS's. - Each thread owns its own segments. + We allocate pages inside bigger "segments" (4mb on 64-bit). This is to avoid + splitting VMA's on Linux and reduce fragmentation on other OS's. + Each thread owns its own segments. Currently we have: - small pages (64kb), 64 in one segment @@ -154,14 +154,14 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* for (size_t i = 0; i < segment->capacity; i++) { const mi_page_t* const page = &segment->pages[i]; if (!page->segment_in_use) { - nfree++; + nfree++; } if (page->segment_in_use || page->is_reset) { mi_assert_expensive(!mi_pages_reset_contains(page, tld)); } } mi_assert_internal(nfree + segment->used == segment->capacity); - mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 + // mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || (mi_segment_page_size(segment) * segment->capacity == segment->segment_size)); return true; @@ -286,7 +286,7 @@ static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segmen mi_assert_expensive(!mi_pages_reset_contains(page, tld)); mi_assert_internal(_mi_page_segment(page)==segment); if (!mi_option_is_enabled(mi_option_page_reset)) return; - if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return; + if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return; if (mi_option_get(mi_option_reset_delay) == 0) { // reset immediately? @@ -295,7 +295,7 @@ static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segmen else { // otherwise push on the delayed page reset queue mi_page_queue_t* pq = &tld->pages_reset; - // push on top + // push on top mi_page_reset_set_expire(page); page->next = pq->first; page->prev = NULL; @@ -316,7 +316,7 @@ static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) { mi_page_queue_t* pq = &tld->pages_reset; mi_assert_internal(pq!=NULL); mi_assert_internal(!page->segment_in_use); - mi_assert_internal(mi_pages_reset_contains(page, tld)); + mi_assert_internal(mi_pages_reset_contains(page, tld)); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == pq->last) pq->last = page->prev; @@ -332,19 +332,19 @@ static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool for if (!page->segment_in_use && page->is_committed && !page->is_reset) { mi_pages_reset_remove(page, tld); if (force_reset) { - mi_page_reset(segment, page, 0, tld); + mi_page_reset(segment, page, 0, tld); } } else { mi_assert_internal(mi_page_not_in_queue(page,tld)); - } + } } } static void mi_reset_delayed(mi_segments_tld_t* tld) { if (!mi_option_is_enabled(mi_option_page_reset)) return; mi_msecs_t now = _mi_clock_now(); - mi_page_queue_t* pq = &tld->pages_reset; + mi_page_queue_t* pq = &tld->pages_reset; // from oldest up to the first that has not expired yet mi_page_t* page = pq->last; while (page != NULL && mi_page_reset_is_expired(page,now)) { @@ -358,7 +358,7 @@ static void mi_reset_delayed(mi_segments_tld_t* tld) { pq->last = page; if (page != NULL){ page->next = NULL; - } + } else { pq->first = NULL; } @@ -540,7 +540,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache == NULL); -#if MI_DEBUG>=2 +#if MI_DEBUG>=2 if (!_mi_is_main_thread()) { mi_assert_internal(tld->pages_reset.first == NULL); mi_assert_internal(tld->pages_reset.last == NULL); @@ -684,7 +684,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { UNUSED(force); - mi_assert(segment != NULL); + mi_assert(segment != NULL); // note: don't reset pages even on abandon as the whole segment is freed? (and ready for reuse) bool force_reset = (force && mi_option_is_enabled(mi_option_abandoned_page_reset)); mi_pages_reset_remove_all_in_segment(segment, force_reset, tld); @@ -716,7 +716,7 @@ static bool mi_segment_has_free(const mi_segment_t* segment) { static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(_mi_page_segment(page) == segment); - mi_assert_internal(!page->segment_in_use); + mi_assert_internal(!page->segment_in_use); // set in-use before doing unreset to prevent delayed reset mi_pages_reset_remove(page, tld); page->segment_in_use = true; @@ -756,7 +756,7 @@ static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); // clear page data; can be called on abandoned segments -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool allow_reset, mi_segments_tld_t* tld) +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool allow_reset, mi_segments_tld_t* tld) { mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); @@ -787,7 +787,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a segment->used--; // add to the free page list for reuse/reset - if (allow_reset) { + if (allow_reset) { mi_pages_reset_add(segment, page, tld); } } @@ -841,12 +841,12 @@ Note: the current implementation is one possible design; another way might be to keep track of abandoned segments in the regions. This would have the advantage of keeping all concurrent code in one place and not needing to deal -with ABA issues. The drawback is that it is unclear how to -scan abandoned segments efficiently in that case as they +with ABA issues. The drawback is that it is unclear how to +scan abandoned segments efficiently in that case as they would be spread among all other segments in the regions. ----------------------------------------------------------- */ -// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers +// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers // to put in a tag that increments on update to avoid the A-B-A problem. #define MI_TAGGED_MASK MI_SEGMENT_MASK typedef uintptr_t mi_tagged_segment_t; @@ -862,7 +862,7 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se } // This is a list of visited abandoned pages that were full at the time. -// this list migrates to `abandoned` when that becomes NULL. The use of +// this list migrates to `abandoned` when that becomes NULL. The use of // this list reduces contention and the rate at which segments are visited. static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL @@ -888,7 +888,7 @@ static void mi_abandoned_visited_push(mi_segment_t* segment) { } // Move the visited list to the abandoned list. -static bool mi_abandoned_visited_revisit(void) +static bool mi_abandoned_visited_revisit(void) { // quick check if the visited list is empty if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false; @@ -954,12 +954,12 @@ static mi_segment_t* mi_abandoned_pop(void) { segment = mi_tagged_segment_ptr(ts); if (mi_likely(segment == NULL)) { if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL - return NULL; + return NULL; } } // Do a pop. We use a reader count to prevent - // a segment to be decommitted while a read is still pending, + // a segment to be decommitted while a read is still pending, // and a tagged pointer to prevent A-B-A link corruption. // (this is called from `memory.c:_mi_mem_free` for example) mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted @@ -1024,7 +1024,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { ----------------------------------------------------------- */ // Possibly clear pages and check if free space is available -static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free) +static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free) { mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); bool has_page = false; @@ -1032,17 +1032,17 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool size_t pages_used_empty = 0; for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (page->segment_in_use) { + if (page->segment_in_use) { pages_used++; // ensure used count is up to date and collect potential concurrent frees - _mi_page_free_collect(page, false); + _mi_page_free_collect(page, false); if (mi_page_all_free(page)) { // if everything free already, page can be reused for some block size // note: don't clear the page yet as we can only OS reset it once it is reclaimed pages_used_empty++; has_page = true; } - else if (page->xblock_size == block_size && mi_page_has_any_available(page)) { + else if (page->xblock_size == block_size && mi_page_has_any_available(page)) { // a page has available free blocks of the right size has_page = true; } @@ -1051,7 +1051,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool // whole empty page has_page = true; } - } + } mi_assert_internal(pages_used == segment->used && pages_used >= pages_used_empty); if (all_pages_free != NULL) { *all_pages_free = ((pages_used - pages_used_empty) == 0); @@ -1100,7 +1100,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } } } - } + } else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet // note: do not reset as this includes pages that were not touched before // mi_pages_reset_add(segment, page, tld); @@ -1141,17 +1141,17 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, // free the segment (by forced reclaim) to make it available to other threads. // note1: we prefer to free a segment as that might lead to reclaiming another // segment that is still partially used. - // note2: we could in principle optimize this by skipping reclaim and directly + // note2: we could in principle optimize this by skipping reclaim and directly // freeing but that would violate some invariants temporarily) mi_segment_reclaim(segment, heap, 0, NULL, tld); } else if (has_page && segment->page_kind == page_kind) { - // found a free page of the right kind, or page of the right block_size with free space + // found a free page of the right kind, or page of the right block_size with free space // we return the result of reclaim (which is usually `segment`) as it might free // the segment due to concurrent frees (in which case `NULL` is returned). return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); } - else if (segment->abandoned_visits >= 3) { + else if (segment->abandoned_visits >= 3) { // always reclaim on 3rd visit to limit the list length. mi_segment_reclaim(segment, heap, 0, NULL, tld); } @@ -1165,12 +1165,12 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, /* ----------------------------------------------------------- - Reclaim or allocate + Reclaim or allocate ----------------------------------------------------------- */ -static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_assert_internal(page_kind <= MI_PAGE_LARGE); + mi_assert_internal(page_kind <= MI_PAGE_LARGE); mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); // 1. try to get a segment from our cache mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld); @@ -1191,7 +1191,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s return segment; } // 3. otherwise allocate a fresh segment - return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); + return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); } @@ -1216,11 +1216,11 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* // Allocate a page inside a segment. Requires that the page has free pages static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); - return mi_segment_find_free(segment, tld); + return mi_segment_find_free(segment, tld); } static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - // find an available segment the segment free queue + // find an available segment the segment free queue mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); if (mi_segment_queue_is_empty(free_queue)) { // possibly allocate or reclaim a fresh segment @@ -1275,7 +1275,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld } /* ----------------------------------------------------------- - Page allocation + Page allocation ----------------------------------------------------------- */ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { diff --git a/test/test-stress.c b/test/test-stress.c index 1b559a59..8958933e 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -20,7 +20,7 @@ terms of the MIT license. #include #include #include -#include +// #include // > mimalloc-test-stress [THREADS] [SCALE] [ITER] // @@ -38,7 +38,7 @@ static bool allow_large_objects = true; // allow very large objects? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? -#ifdef USE_STD_MALLOC +#ifndef USE_STD_MALLOC #define custom_calloc(n,s) calloc(n,s) #define custom_realloc(p,s) realloc(p,s) #define custom_free(p) free(p) @@ -188,7 +188,7 @@ static void test_stress(void) { free_items(p); } } - mi_collect(false); + // mi_collect(false); #ifndef NDEBUG if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif @@ -206,7 +206,7 @@ static void leak(intptr_t tid) { } } -static void test_leak(void) { +static void test_leak(void) { for (int n = 0; n < ITER; n++) { run_os_threads(THREADS, &leak); mi_collect(false); @@ -242,15 +242,15 @@ int main(int argc, char** argv) { // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. srand(0x7feb352d); - mi_stats_reset(); + // mi_stats_reset(); #ifdef STRESS test_stress(); #else test_leak(); -#endif +#endif - mi_collect(true); - mi_stats_print(NULL); + // mi_collect(true); + // mi_stats_print(NULL); //bench_end_program(); return 0; } @@ -262,7 +262,7 @@ static void (*thread_entry_fun)(intptr_t) = &stress; #include -static DWORD WINAPI thread_entry(LPVOID param) { +static DWORD WINAPI thread_entry(LPVOID param) { thread_entry_fun((intptr_t)param); return 0; } From ed1c8a203ab0ce9df97919767d01bc3f180ec2f1 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 29 Jan 2020 23:08:12 -0800 Subject: [PATCH 245/293] improve performance with tls recursion counter --- include/mimalloc-internal.h | 19 +++++++++++-------- src/init.c | 23 +++++++++++++++-------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index f4b578f6..b2e57aec 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -275,24 +275,27 @@ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value o extern mi_heap_t _mi_heap_main; // statically allocated main backing heap extern bool _mi_process_is_initialized; +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from #ifdef MI_TLS_RECURSE_GUARD extern mi_heap_t* _mi_get_default_heap_tls_safe(void); +extern size_t _mi_tls_recurse; +#endif + static inline mi_heap_t* mi_get_default_heap(void) { + #ifdef MI_TLS_RECURSE_GUARD + if (_mi_tls_recurse++>100) { // on some BSD platforms, like macOS, the dynamic loader calls `malloc` // to initialize thread local data. To avoid recursion, we need to avoid // accessing the thread local `_mi_default_heap` until our module is loaded // and use the statically allocated main heap until that time. // TODO: patch ourselves dynamically to avoid this check every time? - return _mi_get_default_heap_tls_safe(); -#else - -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from - -static inline mi_heap_t* mi_get_default_heap(void) { + mi_heap_t* heap = _mi_get_default_heap_tls_safe(); + _mi_tls_recurse = 0; + return heap; + } + #endif return _mi_heap_default; - -#endif } static inline bool mi_heap_is_default(const mi_heap_t* heap) { diff --git a/src/init.c b/src/init.c index 922b7438..750be169 100644 --- a/src/init.c +++ b/src/init.c @@ -266,9 +266,8 @@ static void _mi_thread_done(mi_heap_t* default_heap); #endif // Set up handlers so `mi_thread_done` is called automatically -static bool tls_initialized = false; // fine if it races - static void mi_process_setup_auto_thread_done(void) { + static bool tls_initialized = false; // fine if it races if (tls_initialized) return; tls_initialized = true; #if defined(_WIN32) && defined(MI_SHARED_LIB) @@ -319,9 +318,6 @@ static void _mi_thread_done(mi_heap_t* heap) { void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(heap != NULL); - #ifndef MI_TLS_RECURSE_GUARD - _mi_heap_default = heap; - #endif // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. @@ -332,8 +328,18 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { #elif defined(MI_USE_PTHREADS) pthread_setspecific(mi_pthread_key, heap); #endif + if (_mi_tls_recurse < 100) { + _mi_heap_default = heap; + } } +#ifdef MI_TLS_RECURSE_GUARD +// initialize high so the first call uses safe TLS +size_t _mi_tls_recurse = 10000; +#else +size_t _mi_tls_recurse = 0; +#endif + mi_heap_t* _mi_get_default_heap_tls_safe(void) { if (mi_unlikely(mi_pthread_key==0)) return (mi_heap_t*)&_mi_heap_empty; mi_heap_t* heap = pthread_getspecific(mi_pthread_key); @@ -347,7 +353,6 @@ static void mi_process_done(void); static bool os_preloading = true; // true until this module is initialized static bool mi_redirected = false; // true if malloc redirects to mi_malloc -bool _mi_tls_initialized = false; // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. bool _mi_preloading() { @@ -395,7 +400,7 @@ static void mi_process_load(void) { volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; UNUSED(dummy); os_preloading = false; - _mi_tls_initialized = true; + _mi_heap_set_default_direct(&_mi_heap_main); atexit(&mi_process_done); _mi_options_init(); mi_process_init(); @@ -414,7 +419,9 @@ void _mi_heap_main_init(void) { if (_mi_heap_main.cookie == 0) { _mi_heap_main.thread_id = _mi_thread_id(); _mi_heap_main.cookie = _os_random_weak((uintptr_t)&_mi_heap_main_init); - _mi_random_init(&_mi_heap_main.random); + } + if (_mi_tls_recurse < 100) { + _mi_random_init(&_mi_heap_main.random); _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main); } From a7c69ccbeaa92fe792fe4ff6c11e79076ed3aa5d Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 30 Jan 2020 06:25:42 -0800 Subject: [PATCH 246/293] fix stat accounting of segments with huge blocks --- include/mimalloc-internal.h | 1 + src/alloc.c | 29 +---------------------------- src/segment.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 6fca06b8..f18e459b 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -81,6 +81,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page +void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; diff --git a/src/alloc.c b/src/alloc.c index 3f577f2f..990bcf8b 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -176,33 +176,6 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block // Free // ------------------------------------------------------ -// free huge block from another thread -static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { - // huge page segments are always abandoned and can be freed immediately - mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); - mi_assert_internal(segment == _mi_page_segment(page)); - mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); - - // claim it and free - mi_heap_t* heap = mi_get_default_heap(); - // paranoia: if this it the last reference, the cas should always succeed - if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { - mi_block_set_next(page, block, page->free); - page->free = block; - page->used--; - page->is_zero = false; - mi_assert(page->used == 0); - mi_tld_t* tld = heap->tld; - const size_t bsize = mi_page_block_size(page); - if (bsize > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&tld->stats.giant, bsize); - } - else { - _mi_stat_decrease(&tld->stats.huge, bsize); - } - _mi_segment_page_free(page, true, &tld->segments); - } -} // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) @@ -210,7 +183,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // huge page segments are always abandoned and can be freed immediately mi_segment_t* segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { - mi_free_huge_block_mt(segment, page, block); + _mi_segment_huge_page_free(segment, page, block); return; } diff --git a/src/segment.c b/src/segment.c index 3914d770..25941354 100644 --- a/src/segment.c +++ b/src/segment.c @@ -461,7 +461,6 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size; } - static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { segment->thread_id = 0; mi_segments_track_size(-((long)segment_size),tld); @@ -1039,11 +1038,41 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld if (segment == NULL) return NULL; mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); segment->thread_id = 0; // huge pages are immediately abandoned + mi_segments_track_size(-(long)segment->segment_size, tld); mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); return page; } +// free huge block from another thread +void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { + // huge page segments are always abandoned and can be freed immediately by any thread + mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); + mi_assert_internal(segment == _mi_page_segment(page)); + mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + + // claim it and free + mi_heap_t* heap = mi_get_default_heap(); + // paranoia: if this it the last reference, the cas should always succeed + if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { + mi_block_set_next(page, block, page->free); + page->free = block; + page->used--; + page->is_zero = false; + mi_assert(page->used == 0); + mi_segments_tld_t* tld = &heap->tld->segments; + const size_t bsize = mi_page_block_size(page); + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&tld->stats->giant, bsize); + } + else { + _mi_stat_decrease(&tld->stats->huge, bsize); + } + mi_segments_track_size((long)segment->segment_size, tld); + _mi_segment_page_free(page, true, tld); + } +} + /* ----------------------------------------------------------- Page allocation and free ----------------------------------------------------------- */ From 4531367de2bf551d5912bb612fd6b0c59a5bf849 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 31 Jan 2020 13:20:02 -0800 Subject: [PATCH 247/293] fix padding check for aligned allocation; improve perf for small aligned allocations --- include/mimalloc-types.h | 15 ++++++---- src/alloc-aligned.c | 8 ++++-- src/alloc-posix.c | 13 ++++++--- src/alloc.c | 60 +++++++++++++++++++--------------------- src/options.c | 4 +-- 5 files changed, 53 insertions(+), 47 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 39debae1..9cda377e 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -54,16 +54,19 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_ENCODE_FREELIST 1 #endif -// Reserve extra padding at the end of each block; must be a multiple of `sizeof(intptr_t)`! +// Reserve extra padding at the end of each block; must be a multiple of `2*sizeof(intptr_t)`! // If free lists are encoded, the padding is checked if it was modified on free. -#if (!defined(MI_PADDING)) -#if (MI_SECURE>=3 || MI_DEBUG>=1) -#define MI_PADDING MI_MAX_ALIGN_SIZE +#if (!defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1)) +#define MI_PADDING +#endif + +#if defined(MI_PADDING) +#define MI_PADDING_SIZE (2*sizeof(intptr_t)) #else -#define MI_PADDING 0 -#endif +#define MI_PADDING_SIZE 0 #endif + // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 55b0e041..3749fbc6 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -18,20 +18,22 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t // note: we don't require `size > offset`, we just guarantee that // the address at offset is aligned regardless of the allocated size. mi_assert(alignment > 0 && alignment % sizeof(void*) == 0); + + if (alignment <= MI_MAX_ALIGN_SIZE && offset==0) return _mi_heap_malloc_zero(heap, size, zero); if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see ) if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see ) const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` // try if there is a small block available with just the right alignment - if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { - mi_page_t* page = _mi_heap_get_free_small_page(heap,size); + if (mi_likely(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE))) { + mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; if (mi_likely(page->free != NULL && is_aligned)) { #if MI_STAT>1 mi_heap_stat_increase( heap, malloc, size); #endif - void* p = _mi_page_malloc(heap,page,size); // TODO: inline _mi_page_malloc + void* p = _mi_page_malloc(heap,page,size + MI_PADDING_SIZE); // TODO: inline _mi_page_malloc mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); if (zero) _mi_block_zero_init(page,p,size); diff --git a/src/alloc-posix.c b/src/alloc-posix.c index 505e42e4..ade8cc48 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -47,16 +47,19 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept // Note: The spec dictates we should not modify `*p` on an error. (issue#27) // if (p == NULL) return EINVAL; - if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment + if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment if (!_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 - void* q = mi_malloc_aligned(size, alignment); + void* q = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); if (q==NULL && size != 0) return ENOMEM; + mi_assert_internal(((uintptr_t)q % alignment) == 0); *p = q; return 0; } void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { - return mi_malloc_aligned(size, alignment); + void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); + mi_assert_internal(((uintptr_t)p % alignment) == 0); + return p; } void* mi_valloc(size_t size) mi_attr_noexcept { @@ -73,7 +76,9 @@ void* mi_pvalloc(size_t size) mi_attr_noexcept { void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { if (alignment==0 || !_mi_is_power_of_two(alignment)) return NULL; if ((size&(alignment-1)) != 0) return NULL; // C11 requires integral multiple, see - return mi_malloc_aligned(size, alignment); + void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); + mi_assert_internal(((uintptr_t)p % alignment) == 0); + return p; } void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD diff --git a/src/alloc.c b/src/alloc.c index 6852d652..34e65765 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -43,9 +43,9 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz mi_heap_stat_increase(heap,normal[bin], 1); } #endif -#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) - mi_assert_internal((MI_PADDING % sizeof(mi_block_t*)) == 0); - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); +#if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) + mi_assert_internal((MI_PADDING_SIZE % sizeof(mi_block_t*)) == 0); + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE); mi_block_set_nextx(page, padding, block, page->key[0], page->key[1]); #endif return block; @@ -53,39 +53,27 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // allocate a small block extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { - mi_assert(size <= MI_SMALL_SIZE_MAX); - mi_page_t* page = _mi_heap_get_free_small_page(heap,size); - return _mi_page_malloc(heap, page, size); + mi_assert(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE)); + mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); + void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE); + mi_assert_internal(p==NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE)); + return p; } extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept { -#if (MI_PADDING>0) - size += MI_PADDING; -#endif return mi_heap_malloc_small(mi_get_default_heap(), size); } - -// zero initialized small block -mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept { - void* p = mi_malloc_small(size); - if (p != NULL) { memset(p, 0, size); } - return p; -} - // The main allocation function extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { mi_assert(heap!=NULL); mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local -#if (MI_PADDING>0) - size += MI_PADDING; -#endif void* p; - if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { + if (mi_likely(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE))) { p = mi_heap_malloc_small(heap, size); } else { - p = _mi_malloc_generic(heap, size); + p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); } #if MI_STAT>1 if (p != NULL) { @@ -93,6 +81,7 @@ extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t siz mi_heap_stat_increase( heap, malloc, mi_good_size(size) ); // overestimate for aligned sizes } #endif + mi_assert_internal(p == NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE)); return p; } @@ -100,24 +89,34 @@ extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { return mi_heap_malloc(mi_get_default_heap(), size); } + void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // note: we need to initialize the whole block to zero, not just size // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) UNUSED_RELEASE(size); mi_assert_internal(p != NULL); - mi_assert_internal(mi_page_block_size(page) >= size); // size can be zero + mi_assert_internal(mi_page_block_size(page) >= (size + MI_PADDING_SIZE)); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING)); + mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING_SIZE)); } else { // otherwise memset - memset(p, 0, mi_page_block_size(page) - MI_PADDING); + memset(p, 0, mi_page_block_size(page) - MI_PADDING_SIZE); } } +// zero initialized small block +mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept { + void* p = mi_malloc_small(size); + if (p != NULL) { + _mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again? + } + return p; +} + void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) { void* p = mi_heap_malloc(heap,size); if (zero && p != NULL) { @@ -182,9 +181,9 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block } #endif -#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) +#if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING); + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE); mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]); if (decoded != block) { _mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size); @@ -285,7 +284,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { #if (MI_DEBUG) - memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING); + memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING_SIZE); #endif // and push it on the free list @@ -411,10 +410,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; const mi_segment_t* segment = _mi_ptr_segment(p); const mi_page_t* page = _mi_segment_page_of(segment, p); - size_t size = mi_page_block_size(page); -#if defined(MI_PADDING) - size -= MI_PADDING; -#endif + size_t size = mi_page_block_size(page) - MI_PADDING_SIZE; if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); diff --git a/src/options.c b/src/options.c index 7559a4b5..0484c183 100644 --- a/src/options.c +++ b/src/options.c @@ -67,10 +67,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose From 724602b78b1c4a7896c8b615cddbe43358f27801 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 31 Jan 2020 17:27:45 -0800 Subject: [PATCH 248/293] enable page-reset by default --- src/options.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/options.c b/src/options.c index 0484c183..7559a4b5 100644 --- a/src/options.c +++ b/src/options.c @@ -67,10 +67,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose From 8422ab125da114e8cad967889860cc9943b8cca0 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 31 Jan 2020 17:28:26 -0800 Subject: [PATCH 249/293] improve messages; fix reset size calculation on large pages --- src/arena.c | 2 +- src/os.c | 4 ++-- src/segment.c | 10 +++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index 7bf8099b..724fc52c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -283,7 +283,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; } - _mi_verbose_message("reserved %zu gb huge pages on numa node %i (of the %zu gb requested)\n", pages_reserved, numa_node, pages); + _mi_verbose_message("numa node %i: reserved %zu gb huge pages (of the %zu gb requested)\n", numa_node, pages_reserved, pages); size_t bcount = mi_block_count_of_size(hsize); size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); diff --git a/src/os.c b/src/os.c index b8dfaa70..970eeb94 100644 --- a/src/os.c +++ b/src/os.c @@ -851,7 +851,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) else { // fall back to regular large pages mi_huge_pages_available = false; // don't try further huge pages - _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); + _mi_warning_message("unable to allocate using huge (1gb) pages, trying large (2mb) pages instead (status 0x%lx)\n", err); } } // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation @@ -892,7 +892,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) // see: long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { - _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); + _mi_warning_message("failed to bind huge (1gb) pages to numa node %d: %s\n", numa_node, strerror(errno)); } } return p; diff --git a/src/segment.c b/src/segment.c index c7a9662b..01a8a693 100644 --- a/src/segment.c +++ b/src/segment.c @@ -247,6 +247,7 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { mi_assert_internal(page->is_reset); + mi_assert_internal(page->is_committed); mi_assert_internal(!segment->mem_is_fixed); page->is_reset = false; size_t psize; @@ -779,10 +780,14 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a // note: must come after setting `segment_in_use` to false but before block_size becomes 0 //mi_page_reset(segment, page, 0 /*used_size*/, tld); - // zero the page data, but not the segment fields and block_size (for page size calculations) + // zero the page data, but not the segment fields and capacity, and block_size (for page size calculations) uint32_t block_size = page->xblock_size; + uint16_t capacity = page->capacity; + uint16_t reserved = page->reserved; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + page->capacity = capacity; + page->reserved = reserved; page->xblock_size = block_size; segment->used--; @@ -790,6 +795,9 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a if (allow_reset) { mi_pages_reset_add(segment, page, tld); } + + page->capacity = 0; // after reset there can be zero'd now + page->reserved = 0; } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) From 68112a2751d4b4388d91381fce3afb79e3c00eec Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 31 Jan 2020 20:34:24 -0800 Subject: [PATCH 250/293] better padding implementation, more precise statistics --- include/mimalloc-internal.h | 12 ++++- include/mimalloc-types.h | 28 +++++----- src/alloc-aligned.c | 2 +- src/alloc.c | 102 ++++++++++++++++++++---------------- src/page.c | 6 +-- test/main-override-static.c | 2 +- test/test-stress.c | 2 +- 7 files changed, 89 insertions(+), 65 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index c7d7a1da..2c8d767c 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -310,8 +310,10 @@ static inline uintptr_t _mi_ptr_cookie(const void* p) { ----------------------------------------------------------- */ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { - mi_assert_internal(size <= MI_SMALL_SIZE_MAX); - return heap->pages_free_direct[_mi_wsize_from_size(size)]; + mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); + const size_t idx = _mi_wsize_from_size(size); + mi_assert_internal(idx < MI_PAGES_DIRECT); + return heap->pages_free_direct[idx]; } // Get the page belonging to a certain size class @@ -375,6 +377,12 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { } } +// Get the client usable block size of a page (without padding etc) +static inline size_t mi_page_usable_block_size(const mi_page_t* page) { + return mi_page_block_size(page) - MI_PADDING_SIZE; +} + + // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 9cda377e..8712c54a 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -54,16 +54,17 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_ENCODE_FREELIST 1 #endif -// Reserve extra padding at the end of each block; must be a multiple of `2*sizeof(intptr_t)`! +// Reserve extra padding at the end of each block to be more resilient against heap block overflows. // If free lists are encoded, the padding is checked if it was modified on free. #if (!defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1)) -#define MI_PADDING +#define MI_PADDING #endif +// The padding size must be at least `sizeof(intptr_t)`! #if defined(MI_PADDING) -#define MI_PADDING_SIZE (2*sizeof(intptr_t)) +#define MI_PADDING_WSIZE 1 #else -#define MI_PADDING_SIZE 0 +#define MI_PADDING_WSIZE 0 #endif @@ -94,11 +95,13 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_INTPTR_SIZE (1<free+offset) & align_mask)==0; if (mi_likely(page->free != NULL && is_aligned)) diff --git a/src/alloc.c b/src/alloc.c index 34e65765..999a6ca5 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -38,14 +38,15 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz block->next = 0; // don't leak internal data #endif #if (MI_STAT>1) - if(size <= MI_LARGE_OBJ_SIZE_MAX) { - size_t bin = _mi_bin(size); + const size_t bsize = mi_page_usable_block_size(page); + if(bsize <= MI_LARGE_OBJ_SIZE_MAX) { + const size_t bin = _mi_bin(bsize); mi_heap_stat_increase(heap,normal[bin], 1); } #endif #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) mi_assert_internal((MI_PADDING_SIZE % sizeof(mi_block_t*)) == 0); - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE); + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + mi_page_usable_block_size(page)); mi_block_set_nextx(page, padding, block, page->key[0], page->key[1]); #endif return block; @@ -53,10 +54,18 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // allocate a small block extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { - mi_assert(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE)); + mi_assert(heap!=NULL); + mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + mi_assert(size <= MI_SMALL_SIZE_MAX); mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE); - mi_assert_internal(p==NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE)); + mi_assert_internal(p==NULL || mi_usable_size(p) >= size); + #if MI_STAT>1 + if (p != NULL) { + if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); + } + #endif return p; } @@ -66,23 +75,22 @@ extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexc // The main allocation function extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { - mi_assert(heap!=NULL); - mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local - void* p; - if (mi_likely(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE))) { - p = mi_heap_malloc_small(heap, size); + if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { + return mi_heap_malloc_small(heap, size); } else { - p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); + mi_assert(heap!=NULL); + mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); + mi_assert_internal(p == NULL || mi_usable_size(p) >= size); + #if MI_STAT>1 + if (p != NULL) { + if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); + } + #endif + return p; } - #if MI_STAT>1 - if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } - mi_heap_stat_increase( heap, malloc, mi_good_size(size) ); // overestimate for aligned sizes - } - #endif - mi_assert_internal(p == NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE)); - return p; } extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { @@ -91,20 +99,20 @@ extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { - // note: we need to initialize the whole block to zero, not just size + // note: we need to initialize the whole usable block size to zero, not just the requested size, // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) UNUSED_RELEASE(size); mi_assert_internal(p != NULL); - mi_assert_internal(mi_page_block_size(page) >= (size + MI_PADDING_SIZE)); // size can be zero + mi_assert_internal(mi_usable_size(p) >= size); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING_SIZE)); + mi_assert_expensive(mi_mem_is_zero(p, mi_page_usable_block_size(page))); } else { // otherwise memset - memset(p, 0, mi_page_block_size(page) - MI_PADDING_SIZE); + memset(p, 0, mi_page_usable_block_size(page)); } } @@ -183,10 +191,11 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE); + mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + mi_page_usable_block_size(page)); mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]); if (decoded != block) { - _mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size); + const ptrdiff_t size = (uint8_t*)padding - (uint8_t*)block; + _mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zd bytes\n", block, size ); } } #else @@ -208,7 +217,7 @@ static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_pag mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); // claim it and free - mi_heap_t* heap = mi_get_default_heap(); + mi_heap_t* const heap = mi_get_default_heap(); // paranoia: if this it the last reference, the cas should always succeed if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { mi_block_set_next(page, block, page->free); @@ -216,8 +225,8 @@ static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_pag page->used--; page->is_zero = false; mi_assert(page->used == 0); - mi_tld_t* tld = heap->tld; - const size_t bsize = mi_page_block_size(page); + mi_tld_t* const tld = heap->tld; + const size_t bsize = mi_page_usable_block_size(page); if (bsize > MI_HUGE_OBJ_SIZE_MAX) { _mi_stat_decrease(&tld->stats.giant, bsize); } @@ -232,14 +241,17 @@ static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_pag static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { // huge page segments are always abandoned and can be freed immediately - mi_segment_t* segment = _mi_page_segment(page); + mi_segment_t* const segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { mi_free_huge_block_mt(segment, page, block); return; } + // The padding check accesses the non-thread-owned page for the key values. + // that is safe as these are constant and the page won't be freed (as the block is not freed yet). mi_check_padding(page, block); + // Try to put the block on either the page-local thread free list, or the heap delayed free list. mi_thread_free_t tfree; mi_thread_free_t tfreex; bool use_delayed; @@ -259,7 +271,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* heap = mi_page_heap(page); + mi_heap_t* const heap = mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) @@ -311,15 +323,15 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block // Adjust a block that was allocated aligned, to the actual start of the block in the page. mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); - size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); - size_t adjust = (diff % mi_page_block_size(page)); + const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); + const size_t adjust = (diff % mi_page_block_size(page)); return (mi_block_t*)((uintptr_t)p - adjust); } static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) { - mi_page_t* page = _mi_segment_page_of(segment, p); - mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); + mi_page_t* const page = _mi_segment_page_of(segment, p); + mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); _mi_free_block(page, local, block); } @@ -356,12 +368,12 @@ void mi_free(void* p) mi_attr_noexcept mi_page_t* const page = _mi_segment_page_of(segment, p); #if (MI_STAT>1) - mi_heap_t* heap = mi_heap_get_default(); - mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); - if (page->xblock_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, normal[_mi_bin(page->xblock_size)], 1); - } - // huge page stat is accounted for in `_mi_page_retire` + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_usable_block_size(page); + mi_heap_stat_decrease(heap, malloc, bsize); + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { // huge page stats are accounted for in `_mi_page_retire` + mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], 1); + } #endif if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks @@ -385,10 +397,10 @@ void mi_free(void* p) mi_attr_noexcept bool _mi_free_delayed_block(mi_block_t* block) { // get segment and page - const mi_segment_t* segment = _mi_ptr_segment(block); + const mi_segment_t* const segment = _mi_ptr_segment(block); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(_mi_thread_id() == segment->thread_id); - mi_page_t* page = _mi_segment_page_of(segment, block); + mi_page_t* const page = _mi_segment_page_of(segment, block); // Clear the no-delayed flag so delayed freeing is used again for this page. // This must be done before collecting the free lists on this page -- otherwise @@ -408,9 +420,9 @@ bool _mi_free_delayed_block(mi_block_t* block) { // Bytes available in a block size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; - const mi_segment_t* segment = _mi_ptr_segment(p); - const mi_page_t* page = _mi_segment_page_of(segment, p); - size_t size = mi_page_block_size(page) - MI_PADDING_SIZE; + const mi_segment_t* const segment = _mi_ptr_segment(p); + const mi_page_t* const page = _mi_segment_page_of(segment, p); + const size_t size = mi_page_usable_block_size(page); if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); diff --git a/src/page.c b/src/page.c index edbc7411..57adbc91 100644 --- a/src/page.c +++ b/src/page.c @@ -752,7 +752,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); if (page != NULL) { - const size_t bsize = mi_page_block_size(page); + const size_t bsize = mi_page_usable_block_size(page); mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(bsize >= size); mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); @@ -761,11 +761,11 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_page_set_heap(page, NULL); if (bsize > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_increase(&heap->tld->stats.giant, block_size); + _mi_stat_increase(&heap->tld->stats.giant, bsize); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); } else { - _mi_stat_increase(&heap->tld->stats.huge, block_size); + _mi_stat_increase(&heap->tld->stats.huge, bsize); _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); } } diff --git a/test/main-override-static.c b/test/main-override-static.c index a1c3edee..4bbff192 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -19,7 +19,7 @@ int main() { // double_free1(); // double_free2(); // corrupt_free(); - // block_overflow1(); + //block_overflow1(); void* p1 = malloc(78); void* p2 = malloc(24); diff --git a/test/test-stress.c b/test/test-stress.c index 1b559a59..05254e5d 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -27,7 +27,7 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 10; // scaling factor -static int ITER = 50; // N full iterations destructing and re-creating all threads +static int ITER = 10; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor From 40f1e1e07b9452ad46ae47dfb3887e7f5cb6ca4d Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 31 Jan 2020 23:39:51 -0800 Subject: [PATCH 251/293] byte-precise heap block overflow checking with encoded padding --- ide/vs2019/mimalloc.vcxproj | 2 +- include/mimalloc-internal.h | 3 +- include/mimalloc-types.h | 30 +++++--- src/alloc.c | 135 +++++++++++++++++++++++++++--------- src/init.c | 10 ++- test/main-override-static.c | 6 +- test/test-stress.c | 2 +- 7 files changed, 138 insertions(+), 50 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index a1372204..fad6de5d 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -248,4 +248,4 @@ - + \ No newline at end of file diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 2c8d767c..be10bdc3 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -377,7 +377,8 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { } } -// Get the client usable block size of a page (without padding etc) +// Get the usable block size of a page without fixed padding. +// This may still include internal padding due to alignment and rounding up size classes. static inline size_t mi_page_usable_block_size(const mi_page_t* page) { return mi_page_block_size(page) - MI_PADDING_SIZE; } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 8712c54a..ccb37fcf 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -49,23 +49,17 @@ terms of the MIT license. A copy of the license can be found in the file #endif // Encoded free lists allow detection of corrupted free lists -// and can detect buffer overflows and double `free`s. +// and can detect buffer overflows, modify after free, and double `free`s. #if (MI_SECURE>=3 || MI_DEBUG>=1) #define MI_ENCODE_FREELIST 1 #endif // Reserve extra padding at the end of each block to be more resilient against heap block overflows. -// If free lists are encoded, the padding is checked if it was modified on free. +// If free lists are encoded, the padding can detect byte-precise buffer overflow on free. #if (!defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1)) #define MI_PADDING #endif -// The padding size must be at least `sizeof(intptr_t)`! -#if defined(MI_PADDING) -#define MI_PADDING_WSIZE 1 -#else -#define MI_PADDING_WSIZE 0 -#endif // ------------------------------------------------------ @@ -95,7 +89,6 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_INTPTR_SIZE (1<xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* block = page->free; if (mi_unlikely(block == NULL)) { @@ -29,25 +29,29 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); // pop from the free list - page->free = mi_block_next(page,block); + page->free = mi_block_next(page, block); page->used++; mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); -#if (MI_DEBUG!=0) +#if (MI_DEBUG>0) if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); } #elif (MI_SECURE!=0) block->next = 0; // don't leak internal data #endif #if (MI_STAT>1) const size_t bsize = mi_page_usable_block_size(page); - if(bsize <= MI_LARGE_OBJ_SIZE_MAX) { + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { const size_t bin = _mi_bin(bsize); - mi_heap_stat_increase(heap,normal[bin], 1); + mi_heap_stat_increase(heap, normal[bin], 1); } #endif #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) - mi_assert_internal((MI_PADDING_SIZE % sizeof(mi_block_t*)) == 0); - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + mi_page_usable_block_size(page)); - mi_block_set_nextx(page, padding, block, page->key[0], page->key[1]); + mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); + ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); + mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); + padding->block = (uint32_t)(((uintptr_t)block >> MI_INTPTR_SHIFT) ^ page->key[0]); + padding->delta = (uint32_t)(delta ^ page->key[1]); + uint8_t* fill = (uint8_t*)padding - delta; + for (ptrdiff_t i = 0; i < delta; i++) { fill[i] = MI_DEBUG_PADDING; } #endif return block; } @@ -101,18 +105,18 @@ extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // note: we need to initialize the whole usable block size to zero, not just the requested size, // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) - UNUSED_RELEASE(size); + UNUSED(size); mi_assert_internal(p != NULL); mi_assert_internal(mi_usable_size(p) >= size); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p, mi_page_usable_block_size(page))); + mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p))); } else { // otherwise memset - memset(p, 0, mi_page_usable_block_size(page)); + memset(p, 0, mi_usable_size(p)); } } @@ -189,20 +193,82 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block } #endif +// --------------------------------------------------------------------------- +// Check for heap block overflow by setting up padding at the end of the block +// --------------------------------------------------------------------------- + #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) -static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + mi_page_usable_block_size(page)); - mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]); - if (decoded != block) { - const ptrdiff_t size = (uint8_t*)padding - (uint8_t*)block; - _mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zd bytes\n", block, size ); +static mi_padding_t mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* bsize) { + *bsize = mi_page_usable_block_size(page); + const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); + mi_padding_t pad; + pad.block = padding->block ^ (uint32_t)page->key[0]; + pad.delta = padding->delta ^ (uint32_t)page->key[1]; + return pad; +} + +// Return the exact usable size of a block. +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + size_t bsize; + mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); + return bsize - pad.delta; +} + +static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { + size_t bsize; + const mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); + *size = *wrong = bsize; + if ((uint32_t)((uintptr_t)block >> MI_INTPTR_SHIFT) != pad.block) return false; + if (pad.delta > bsize) return false; // can be equal for zero-sized allocation! + *size = bsize - pad.delta; + uint8_t* fill = (uint8_t*)block + bsize - pad.delta; + for (uint32_t i = 0; i < pad.delta; i++) { + if (fill[i] != MI_DEBUG_PADDING) { + *wrong = bsize - pad.delta + i; + return false; + } } + return true; +} + +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + size_t size; + size_t wrong; + if (!mi_verify_padding(page,block,&size,&wrong)) { + _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong ); + } +} + +// When a non-thread-local block is freed, it becomes part of the thread delayed free +// list that is freed later by the owning heap. If the exact usable size is too small to +// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) +// so it will later not trigger an overflow error in `mi_free_block`. +static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + size_t bsize; + mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); + if ((bsize - pad.delta) >= min_size) return; + mi_assert_internal(bsize >= min_size); + ptrdiff_t delta = (bsize - min_size); + mi_assert_internal(delta >= 0 && delta < (ptrdiff_t)bsize); + mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); + padding->delta = (uint32_t)(delta ^ page->key[1]); } #else static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { UNUSED(page); UNUSED(block); } + +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + UNUSED(block); + return mi_page_usable_block_size(page); +} + +static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + UNUSED(page); + UNUSED(block); + UNUSED(min_size); +} #endif // ------------------------------------------------------ @@ -240,6 +306,14 @@ static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_pag // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { + // The padding check may access the non-thread-owned page for the key values. + // that is safe as these are constant and the page won't be freed (as the block is not freed yet). + mi_check_padding(page, block); + mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + #endif + // huge page segments are always abandoned and can be freed immediately mi_segment_t* const segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { @@ -247,10 +321,6 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc return; } - // The padding check accesses the non-thread-owned page for the key values. - // that is safe as these are constant and the page won't be freed (as the block is not freed yet). - mi_check_padding(page, block); - // Try to put the block on either the page-local thread free list, or the heap delayed free list. mi_thread_free_t tfree; mi_thread_free_t tfreex; @@ -295,15 +365,14 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // regular free static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { - #if (MI_DEBUG) - memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING_SIZE); - #endif - // and push it on the free list if (mi_likely(local)) { // owning thread can free a block directly if (mi_unlikely(mi_check_is_double_free(page, block))) return; mi_check_padding(page, block); + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -312,7 +381,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block } else if (mi_unlikely(mi_page_is_in_full(page))) { _mi_page_unfull(page); - } + } } else { _mi_free_block_mt(page,block); @@ -366,6 +435,7 @@ void mi_free(void* p) mi_attr_noexcept const uintptr_t tid = _mi_thread_id(); mi_page_t* const page = _mi_segment_page_of(segment, p); + mi_block_t* const block = (mi_block_t*)p; #if (MI_STAT>1) mi_heap_t* const heap = mi_heap_get_default(); @@ -377,16 +447,18 @@ void mi_free(void* p) mi_attr_noexcept #endif if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks - // local, and not full or aligned - mi_block_t* const block = (mi_block_t*)p; + // local, and not full or aligned if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_check_padding(page, block); + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); - } + } } else { // non-local, aligned blocks, or a full page; use the more generic path @@ -422,9 +494,10 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; const mi_segment_t* const segment = _mi_ptr_segment(p); const mi_page_t* const page = _mi_segment_page_of(segment, p); - const size_t size = mi_page_usable_block_size(page); + const mi_block_t* const block = (const mi_block_t*)p; + const size_t size = mi_page_usable_size_of(page, block); if (mi_unlikely(mi_page_has_aligned(page))) { - ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); + ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); return (size - adjust); } diff --git a/src/init.c b/src/init.c index f8411187..c657fa4c 100644 --- a/src/init.c +++ b/src/init.c @@ -31,8 +31,14 @@ const mi_page_t _mi_page_empty = { }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) -#define MI_SMALL_PAGES_EMPTY \ - { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } + +#if defined(MI_PADDING) && (MI_INTPTR_SIZE >= 8) +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } +#elif defined(MI_PADDING) +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } +#else +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } +#endif // Empty page queues for every bin diff --git a/test/main-override-static.c b/test/main-override-static.c index 4bbff192..839a5d2f 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -19,7 +19,7 @@ int main() { // double_free1(); // double_free2(); // corrupt_free(); - //block_overflow1(); + // block_overflow1(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -44,8 +44,8 @@ int main() { } static void block_overflow1() { - void* p = mi_malloc(16); - memset(p, 0, 17); + uint8_t* p = (uint8_t*)mi_malloc(17); + p[18] = 0; free(p); } diff --git a/test/test-stress.c b/test/test-stress.c index 05254e5d..1b559a59 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -27,7 +27,7 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 10; // scaling factor -static int ITER = 10; // N full iterations destructing and re-creating all threads +static int ITER = 50; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor From aa68b8cbc7830bebbaec98f8c851a5f358993614 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 1 Feb 2020 12:15:12 -0800 Subject: [PATCH 252/293] improve encoding of padding canary and buffer overflow detection --- include/mimalloc-internal.h | 33 ++++++++++++++--------- include/mimalloc-types.h | 25 +++++++++-------- src/alloc.c | 54 ++++++++++++++++++++----------------- src/heap.c | 6 ++--- src/init.c | 12 ++++----- src/page.c | 14 +++++----- 6 files changed, 78 insertions(+), 66 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index be10bdc3..9bba6e8f 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -519,30 +519,37 @@ static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); } -static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) { +static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { + void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); + return (mi_unlikely(p==null) ? NULL : p); +} + +static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) { + uintptr_t x = (uintptr_t)(mi_unlikely(p==NULL) ? null : p); + return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; +} + +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { #ifdef MI_ENCODE_FREELIST - mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2); - if (mi_unlikely((void*)b==null)) { b = NULL; } - return b; + return (mi_block_t*)mi_ptr_decode(null, block->next, keys); #else - UNUSED(key1); UNUSED(key2); UNUSED(null); + UNUSED(keys); UNUSED(null); return (mi_block_t*)block->next; #endif } -static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) { +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { #ifdef MI_ENCODE_FREELIST - if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } - block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1; + block->next = mi_ptr_encode(null, next, keys); #else - UNUSED(key1); UNUSED(key2); UNUSED(null); + UNUSED(keys); UNUSED(null); block->next = (mi_encoded_t)next; #endif } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { #ifdef MI_ENCODE_FREELIST - mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]); + mi_block_t* next = mi_block_nextx(page,block,page->keys); // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { @@ -552,16 +559,16 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* return next; #else UNUSED(page); - return mi_block_nextx(page,block,0,0); + return mi_block_nextx(page,block,NULL); #endif } static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST - mi_block_set_nextx(page,block,next, page->key[0], page->key[1]); + mi_block_set_nextx(page,block,next, page->keys); #else UNUSED(page); - mi_block_set_nextx(page,block, next,0,0); + mi_block_set_nextx(page,block,next,NULL); #endif } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index ccb37fcf..71f3ae80 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -48,25 +48,24 @@ terms of the MIT license. A copy of the license can be found in the file #endif #endif +// Reserve extra padding at the end of each block to be more resilient against heap block overflows. +// The padding can detect byte-precise buffer overflow on free. +#if !defined(MI_PADDING) && (MI_DEBUG>=1) +#define MI_PADDING 1 +#endif + + // Encoded free lists allow detection of corrupted free lists // and can detect buffer overflows, modify after free, and double `free`s. -#if (MI_SECURE>=3 || MI_DEBUG>=1) +#if (MI_SECURE>=3 || MI_DEBUG>=1 || defined(MI_PADDING)) #define MI_ENCODE_FREELIST 1 #endif -// Reserve extra padding at the end of each block to be more resilient against heap block overflows. -// If free lists are encoded, the padding can detect byte-precise buffer overflow on free. -#if (!defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1)) -#define MI_PADDING -#endif - - // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ - // ------------------------------------------------------ // Size of a pointer. // We assume that `sizeof(void*)==sizeof(intptr_t)` @@ -218,7 +217,7 @@ typedef struct mi_page_s { mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #ifdef MI_ENCODE_FREELIST - uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) + uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) uint32_t xblock_size; // size available in each block (always `>0`) @@ -306,8 +305,8 @@ typedef struct mi_random_cxt_s { // In debug mode there is a padding stucture at the end of the blocks to check for buffer overflows #if defined(MI_PADDING) typedef struct mi_padding_s { - uint32_t block; // (encoded) lower 32 bits of the block address. (to check validity of the block) - uint32_t delta; // (encoded) padding bytes before the block. (mi_usable_size(p) - decode(delta) == exact allocated bytes) + uint32_t canary; // encoded block value to check validity of the padding (in case of overflow) + uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes) } mi_padding_t; #define MI_PADDING_SIZE (sizeof(mi_padding_t)) #define MI_PADDING_WSIZE ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE) @@ -327,7 +326,7 @@ struct mi_heap_s { volatile _Atomic(mi_block_t*) thread_delayed_free; uintptr_t thread_id; // thread this heap belongs too uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) - uintptr_t key[2]; // two random keys used to encode the `thread_delayed_free` list + uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation size_t page_count; // total number of pages in the `pages` queues. bool no_reclaim; // `true` if this heap should not reclaim abandoned pages diff --git a/src/alloc.c b/src/alloc.c index 54057661..134f5b85 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -48,10 +48,11 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); - padding->block = (uint32_t)(((uintptr_t)block >> MI_INTPTR_SHIFT) ^ page->key[0]); - padding->delta = (uint32_t)(delta ^ page->key[1]); + padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); + padding->delta = (uint32_t)(delta); uint8_t* fill = (uint8_t*)padding - delta; - for (ptrdiff_t i = 0; i < delta; i++) { fill[i] = MI_DEBUG_PADDING; } + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes + for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } #endif return block; } @@ -175,7 +176,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field + mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? { @@ -198,33 +199,35 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block // --------------------------------------------------------------------------- #if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST) -static mi_padding_t mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* bsize) { +static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { *bsize = mi_page_usable_block_size(page); const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); - mi_padding_t pad; - pad.block = padding->block ^ (uint32_t)page->key[0]; - pad.delta = padding->delta ^ (uint32_t)page->key[1]; - return pad; + *delta = padding->delta; + return ((uint32_t)mi_ptr_encode(page,block,page->keys) == padding->canary && *delta <= *bsize); } // Return the exact usable size of a block. static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { size_t bsize; - mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); - return bsize - pad.delta; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); mi_assert_internal(delta <= bsize); + return (ok ? bsize - delta : 0); } static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { size_t bsize; - const mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); *size = *wrong = bsize; - if ((uint32_t)((uintptr_t)block >> MI_INTPTR_SHIFT) != pad.block) return false; - if (pad.delta > bsize) return false; // can be equal for zero-sized allocation! - *size = bsize - pad.delta; - uint8_t* fill = (uint8_t*)block + bsize - pad.delta; - for (uint32_t i = 0; i < pad.delta; i++) { + if (!ok) return false; + mi_assert_internal(bsize >= delta); + *size = bsize - delta; + uint8_t* fill = (uint8_t*)block + bsize - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes + for (size_t i = 0; i < maxpad; i++) { if (fill[i] != MI_DEBUG_PADDING) { - *wrong = bsize - pad.delta + i; + *wrong = bsize - delta + i; return false; } } @@ -245,13 +248,16 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { // so it will later not trigger an overflow error in `mi_free_block`. static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { size_t bsize; - mi_padding_t pad = mi_page_decode_padding(page, block, &bsize); - if ((bsize - pad.delta) >= min_size) return; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); + if (!ok || (bsize - delta) >= min_size) return; // usually already enough space mi_assert_internal(bsize >= min_size); - ptrdiff_t delta = (bsize - min_size); - mi_assert_internal(delta >= 0 && delta < (ptrdiff_t)bsize); + if (bsize < min_size) return; // should never happen + size_t new_delta = (bsize - min_size); + mi_assert_internal(new_delta < bsize); mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); - padding->delta = (uint32_t)(delta ^ page->key[1]); + padding->delta = (uint32_t)new_delta; } #else static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { @@ -348,7 +354,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_t* dfree; do { dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); - mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]); + mi_block_set_nextx(heap,block,dfree, heap->keys); } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } diff --git a/src/heap.c b/src/heap.c index e76a147c..1c287db2 100644 --- a/src/heap.c +++ b/src/heap.c @@ -194,9 +194,9 @@ mi_heap_t* mi_heap_new(void) { heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); _mi_random_split(&bheap->random, &heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; - heap->key[0] = _mi_heap_random_next(heap); - heap->key[1] = _mi_heap_random_next(heap); + heap->cookie = _mi_heap_random_next(heap) | 1; + heap->keys[0] = _mi_heap_random_next(heap); + heap->keys[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe return heap; } diff --git a/src/init.c b/src/init.c index c657fa4c..fc62880e 100644 --- a/src/init.c +++ b/src/init.c @@ -173,9 +173,9 @@ static bool _mi_heap_init(void) { memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); _mi_random_init(&heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; - heap->key[0] = _mi_heap_random_next(heap); - heap->key[1] = _mi_heap_random_next(heap); + heap->cookie = _mi_heap_random_next(heap) | 1; + heap->keys[0] = _mi_heap_random_next(heap); + heap->keys[1] = _mi_heap_random_next(heap); heap->tld = tld; tld->heap_backing = heap; tld->segments.stats = &tld->stats; @@ -418,9 +418,9 @@ void mi_process_init(void) mi_attr_noexcept { _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); _mi_random_init(&_mi_heap_main.random); #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. - _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); #endif mi_process_setup_auto_thread_done(); _mi_os_init(); diff --git a/src/page.c b/src/page.c index 57adbc91..23a04a84 100644 --- a/src/page.c +++ b/src/page.c @@ -281,7 +281,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // and free them all while(block != NULL) { - mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]); + mi_block_t* next = mi_block_nextx(heap,block, heap->keys); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet @@ -289,7 +289,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* dfree; do { dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); - mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]); + mi_block_set_nextx(heap, block, dfree, heap->keys); } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } block = next; @@ -348,7 +348,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif @@ -609,8 +609,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST - page->key[0] = _mi_heap_random_next(heap); - page->key[1] = _mi_heap_random_next(heap); + page->keys[0] = _mi_heap_random_next(heap); + page->keys[1] = _mi_heap_random_next(heap); #endif page->is_zero = page->is_zero_init; @@ -623,8 +623,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) - mi_assert_internal(page->key[0] != 0); - mi_assert_internal(page->key[1] != 0); + mi_assert_internal(page->keys[0] != 0); + mi_assert_internal(page->keys[1] != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); From 60cfc623be8838ca32aad627c13f54aa53c18c5f Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 1 Feb 2020 14:29:12 -0800 Subject: [PATCH 253/293] fix zero initialization of blocks under 8 bytes when padding check is active --- src/alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 1f053db9..61f34353 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -110,8 +110,8 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { mi_assert_internal(p != NULL); mi_assert_internal(mi_usable_size(p) >= size); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); - if (page->is_zero) { - // already zero initialized memory? + if (page->is_zero && size > sizeof(mi_block_t)) { + // already zero initialized memory ((mi_block_t*)p)->next = 0; // clear the free list pointer mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p))); } From 5135c2b96a5acd08d2639cf70031f07b08c010f6 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 1 Feb 2020 14:29:55 -0800 Subject: [PATCH 254/293] add test-api to vs solution --- ide/vs2019/mimalloc-test-api.vcxproj | 155 +++++++++++++++++++++++++++ ide/vs2019/mimalloc.sln | 14 ++- 2 files changed, 167 insertions(+), 2 deletions(-) create mode 100644 ide/vs2019/mimalloc-test-api.vcxproj diff --git a/ide/vs2019/mimalloc-test-api.vcxproj b/ide/vs2019/mimalloc-test-api.vcxproj new file mode 100644 index 00000000..812a9cb1 --- /dev/null +++ b/ide/vs2019/mimalloc-test-api.vcxproj @@ -0,0 +1,155 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {FFF7958F-750E-4C21-A04D-22707CC66878} + mimalloc-test-api + 10.0 + mimalloc-test-api + + + + Application + true + v142 + + + Application + false + v142 + true + + + Application + true + v142 + + + Application + false + v142 + true + + + + + + + + + + + + + + + + + + + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + + Level3 + Disabled + true + true + ..\..\include + + + Console + + + + + Level3 + Disabled + true + true + ..\..\include + + + Console + + + + + Level3 + MaxSpeed + true + true + true + true + ..\..\include + %(PreprocessorDefinitions);NDEBUG + + + true + true + Console + + + + + Level3 + MaxSpeed + true + true + true + true + ..\..\include + %(PreprocessorDefinitions);NDEBUG + + + true + true + Console + + + + + + + + + {abb5eae7-b3e6-432e-b636-333449892ea6} + + + + + + diff --git a/ide/vs2019/mimalloc.sln b/ide/vs2019/mimalloc.sln index aeab6b88..fcb938a4 100644 --- a/ide/vs2019/mimalloc.sln +++ b/ide/vs2019/mimalloc.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.28010.2016 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29709.97 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}" EndProject @@ -13,6 +13,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "m EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-api", "mimalloc-test-api.vcxproj", "{FFF7958F-750E-4C21-A04D-22707CC66878}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -61,6 +63,14 @@ Global {FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64 {FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32 {FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32 + {FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64 + {FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64 + {FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32 + {FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32 + {FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64 + {FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64 + {FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32 + {FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE From fea903900d7f40c1c9af4f9059dc2fbfaa6a187c Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 1 Feb 2020 14:33:24 -0800 Subject: [PATCH 255/293] use __thread locals on linux --- include/mimalloc-internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 872c5269..7173a189 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -11,7 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-types.h" #if defined(MI_MALLOC_OVERRIDE) -#if defined(__APPLE__) || defined(__linux__) +#if defined(__APPLE__) #include #define MI_TLS_PTHREADS #elif (defined(__OpenBSD__) || defined(__DragonFly__)) From 0989562c2d87aa77f33e590357501fc9d2d485bc Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 1 Feb 2020 16:57:00 -0800 Subject: [PATCH 256/293] add initial fast tls for macOSX --- include/mimalloc-internal.h | 32 ++++++++++++++++++++++++++------ src/init.c | 19 +++++++++++++------ test/test-stress.c | 4 ++-- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 7173a189..0e3ebed8 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -11,7 +11,10 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-types.h" #if defined(MI_MALLOC_OVERRIDE) -#if defined(__APPLE__) +#if defined(__APPLE__) && (defined(__i386__) || defined(__x86_64__)) +#define MI_TLS_OSX_FAST +#define MI_TLS_OSX_SLOT 94 // seems unused, except in Webkit? See: +#elif defined(__APPLE__) #include #define MI_TLS_PTHREADS #elif (defined(__OpenBSD__) || defined(__DragonFly__)) @@ -284,14 +287,31 @@ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value o extern mi_heap_t _mi_heap_main; // statically allocated main backing heap extern bool _mi_process_is_initialized; -#if defined(MI_TLS_PTHREADS) +#if defined(MI_TLS_OSX_FAST) +#define MI_TLS_OSX_OFFSET (MI_TLS_OSX_SLOT*sizeof(void*)) +static inline void* mi_tls_osx_fast_get(void) { + void* ret; + __asm__("mov %%gs:%1, %0" : "=r" (ret) : "m" (*(void**)(MI_TLS_OSX_OFFSET))); + return ret; +} +static inline void mi_tls_osx_fast_set(void* value) { + __asm__("movq %1,%%gs:%0" : "=m" (*(void**)(MI_TLS_OSX_OFFSET)) : "rn" (value)); +} +#elif defined(MI_TLS_PTHREADS) extern pthread_key_t _mi_heap_default_key; #else extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from #endif static inline mi_heap_t* mi_get_default_heap(void) { -#if defined(MI_TLS_PTHREADS) +#if defined(MI_TLS_OSX_FAST) + // Use a fixed slot in the TSD on MacOSX to avoid recursion (since the loader calls malloc). + // We use slot 94 (__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4) + // which seems unused except for the more recent Webkit + // Use with care. + mi_heap_t* heap = (mi_heap_t*)mi_tls_osx_fast_get(); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +#elif defined(MI_TLS_PTHREADS) // Use pthreads for TLS; this is used on macOSX with interpose as the loader calls `malloc` // to allocate TLS storage leading to recursive calls if __thread declared variables are accessed. // Using pthreads allows us to initialize without recursive calls. (performance seems still quite good). @@ -300,9 +320,9 @@ static inline mi_heap_t* mi_get_default_heap(void) { #else #if defined(MI_TLS_RECURSE_GUARD) // On some BSD platforms, like openBSD, the dynamic loader calls `malloc` - // to initialize thread local data. To avoid recursion, we need to avoid - // accessing the thread local `_mi_default_heap` until our module is loaded - // and use the statically allocated main heap until that time. + // to initialize thread local data (before our module is loaded). + // To avoid recursion, we need to avoid accessing the thread local `_mi_default_heap` + // until our module is loaded and use the statically allocated main heap until that time. // TODO: patch ourselves dynamically to avoid this check every time? if (mi_unlikely(!_mi_process_is_initialized)) return &_mi_heap_main; #endif diff --git a/src/init.c b/src/init.c index 431b7fee..960cccf1 100644 --- a/src/init.c +++ b/src/init.c @@ -260,14 +260,15 @@ static void _mi_thread_done(mi_heap_t* default_heap); // use thread local storage keys to detect thread ending #include #include - static DWORD mi_fls_key; + static DWORD mi_fls_key = (DWORD)(-1); static void NTAPI mi_fls_done(PVOID value) { if (value!=NULL) _mi_thread_done((mi_heap_t*)value); } #elif defined(MI_USE_PTHREADS) - // use pthread locol storage keys to detect thread ending + // use pthread local storage keys to detect thread ending + // (and used with MI_TLS_PTHREADS for the default heap) #include - pthread_key_t _mi_heap_default_key; + pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); static void mi_pthread_done(void* value) { if (value!=NULL) _mi_thread_done((mi_heap_t*)value); } @@ -287,6 +288,7 @@ static void mi_process_setup_auto_thread_done(void) { #elif defined(_WIN32) && !defined(MI_SHARED_LIB) mi_fls_key = FlsAlloc(&mi_fls_done); #elif defined(MI_USE_PTHREADS) + mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); #endif _mi_heap_set_default_direct(&_mi_heap_main); @@ -331,9 +333,14 @@ static void _mi_thread_done(mi_heap_t* heap) { void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(heap != NULL); - #if !defined(MI_TLS_PTHREADS) + #if defined(MI_TLS_OSX_FAST) + mi_tls_osx_fast_set(heap); + #elif defined(MI_TLS_PTHREADS) + // we use _mi_heap_default_key + #else _mi_heap_default = heap; - #endif + #endif + // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. #if defined(_WIN32) && defined(MI_SHARED_LIB) @@ -342,7 +349,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(mi_fls_key != 0); FlsSetValue(mi_fls_key, heap); #elif defined(MI_USE_PTHREADS) - // mi_assert_internal(_mi_heap_default_key != 0); // often 0 is also the allocated key + mi_assert_internal(_mi_heap_default_key != (pthread_key_t)(-1)); pthread_setspecific(_mi_heap_default_key, heap); #endif } diff --git a/test/test-stress.c b/test/test-stress.c index 1bfc5012..7d8993a0 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -27,7 +27,7 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 10; // scaling factor -static int ITER = 5; // N full iterations destructing and re-creating all threads +static int ITER = 50; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor @@ -250,7 +250,7 @@ int main(int argc, char** argv) { #endif // mi_collect(true); - // mi_stats_print(NULL); + mi_stats_print(NULL); //bench_end_program(); return 0; } From 3f17ac287c575e73e30619f970686b7b63951820 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 1 Feb 2020 17:29:30 -0800 Subject: [PATCH 257/293] add 'nodiscard' attribute to mimalloc interface --- include/mimalloc.h | 154 +++++++++++++++++++----------------- src/options.c | 2 +- test/main-override-static.c | 2 +- 3 files changed, 84 insertions(+), 74 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 94fcd788..f94d9ee7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -24,6 +24,16 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_noexcept #endif +#if (__cplusplus >= 201703) +#define mi_decl_nodiscard [[nodiscard]] +#elif (__GNUC__ >= 4) // includes clang and icc +#define mi_decl_nodiscard __attribute__((warn_unused_result)) +#elif (_MSC_VER >= 1700) +#define mi_decl_nodiscard _Check_return_ +#else +#define mi_decl_nodiscard +#endif + #ifdef _MSC_VER #if !defined(MI_SHARED_LIB) #define mi_decl_export @@ -85,15 +95,15 @@ extern "C" { // Standard malloc interface // ------------------------------------------------------ -mi_decl_export mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); -mi_decl_export mi_decl_allocator void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export void mi_free(void* p) mi_attr_noexcept; -mi_decl_export char* mi_strdup(const char* s) mi_attr_noexcept; -mi_decl_export char* mi_strndup(const char* s, size_t n) mi_attr_noexcept; -mi_decl_export char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept; +mi_decl_export void mi_free(void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export char* mi_strdup(const char* s) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export char* mi_strndup(const char* s, size_t n) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept; // ------------------------------------------------------ // Extended functionality @@ -101,16 +111,16 @@ mi_decl_export char* mi_realpath(const char* fname, char* resolved_name) mi_attr #define MI_SMALL_WSIZE_MAX (128) #define MI_SMALL_SIZE_MAX (MI_SMALL_WSIZE_MAX*sizeof(void*)) -mi_decl_export mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); -mi_decl_export mi_decl_allocator void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); -mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; -mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; // ------------------------------------------------------ @@ -145,14 +155,14 @@ mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_ // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -mi_decl_export mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); -mi_decl_export mi_decl_allocator void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); -mi_decl_export mi_decl_allocator void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3); -mi_decl_export mi_decl_allocator void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); -mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); -mi_decl_export mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); // ------------------------------------------------------------------------------------- @@ -161,7 +171,7 @@ mi_decl_export mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t new struct mi_heap_s; typedef struct mi_heap_s mi_heap_t; -mi_decl_export mi_heap_t* mi_heap_new(void); +mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new(void); mi_decl_export void mi_heap_delete(mi_heap_t* heap); mi_decl_export void mi_heap_destroy(mi_heap_t* heap); mi_decl_export mi_heap_t* mi_heap_set_default(mi_heap_t* heap); @@ -169,28 +179,28 @@ mi_decl_export mi_heap_t* mi_heap_get_default(void); mi_decl_export mi_heap_t* mi_heap_get_backing(void); mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept; -mi_decl_export mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); -mi_decl_export mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); -mi_decl_export mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); -mi_decl_export mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept; -mi_decl_export mi_decl_allocator void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); -mi_decl_export char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept; -mi_decl_export char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept; -mi_decl_export char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept; -mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); -mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); -mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4); -mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); -mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3) mi_attr_alloc_align(4); -mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); // -------------------------------------------------------------------------------- @@ -240,8 +250,8 @@ typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg); // Experimental -mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; -mi_decl_export bool mi_is_redirected() mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export bool mi_is_redirected() mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; @@ -297,13 +307,13 @@ typedef enum mi_option_e { } mi_option_t; -mi_decl_export bool mi_option_is_enabled(mi_option_t option); +mi_decl_nodiscard mi_decl_export bool mi_option_is_enabled(mi_option_t option); mi_decl_export void mi_option_enable(mi_option_t option); mi_decl_export void mi_option_disable(mi_option_t option); mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); -mi_decl_export long mi_option_get(mi_option_t option); +mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); mi_decl_export void mi_option_set(mi_option_t option, long value); mi_decl_export void mi_option_set_default(mi_option_t option, long value); @@ -313,24 +323,24 @@ mi_decl_export void mi_option_set_default(mi_option_t option, long value); // (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.) // ------------------------------------------------------------------------------------------------------- -mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; -mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; -mi_decl_export void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); -mi_decl_export void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); +mi_decl_nodiscard mi_decl_export void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); -mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); +mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); -mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; -mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept; -mi_decl_export unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept; -mi_decl_export unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept; mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept; mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept; @@ -339,14 +349,14 @@ mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; // The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`. -// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception). -mi_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); -mi_decl_export void* mi_new_nothrow(size_t size) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); -mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2); -mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); +// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception).mi_decl_nodiscard ami_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export void* mi_new_nothrow(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2); +mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); #ifdef __cplusplus } @@ -358,7 +368,7 @@ mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_a // --------------------------------------------------------------------------------------------- #ifdef __cplusplus -#include // std::numeric_limits +#include // PTRDIFF_MAX #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 #include // std::true_type #include // std::forward @@ -381,10 +391,10 @@ template struct mi_stl_allocator { void deallocate(T* p, size_type) { mi_free(p); } #if (__cplusplus >= 201703L) // C++17 - T* allocate(size_type count) { return static_cast(mi_new_n(count, sizeof(T))); } - T* allocate(size_type count, const void*) { return allocate(count); } + mi_decl_nodiscard T* allocate(size_type count) { return static_cast(mi_new_n(count, sizeof(T))); } + mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); } #else - pointer allocate(size_type count, const void* = 0) { return static_cast(mi_new_n(count, sizeof(value_type))); } + mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast(mi_new_n(count, sizeof(value_type))); } #endif #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 @@ -399,7 +409,7 @@ template struct mi_stl_allocator { void destroy(pointer p) { p->~value_type(); } #endif - size_type max_size() const mi_attr_noexcept { return (std::numeric_limits::max() / sizeof(value_type)); } + size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); } pointer address(reference x) const { return &x; } const_pointer address(const_reference x) const { return &x; } }; diff --git a/src/options.c b/src/options.c index 7559a4b5..72a753e1 100644 --- a/src/options.c +++ b/src/options.c @@ -85,7 +85,7 @@ void _mi_options_init(void) { mi_add_stderr_output(); // now it safe to use stderr for output for(int i = 0; i < _mi_option_last; i++ ) { mi_option_t option = (mi_option_t)i; - mi_option_get(option); // initialize + long l = mi_option_get(option); UNUSED(l); // initialize if (option != mi_option_verbose) { mi_option_desc_t* desc = &options[option]; _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); diff --git a/test/main-override-static.c b/test/main-override-static.c index 839a5d2f..950392d0 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -24,7 +24,7 @@ int main() { void* p1 = malloc(78); void* p2 = malloc(24); free(p1); - p1 = malloc(8); + p1 = mi_malloc(8); //char* s = strdup("hello\n"); free(p2); p2 = malloc(16); From 8aba40a9728fa50f2d541c8712257ff7cc264b18 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 1 Feb 2020 17:41:04 -0800 Subject: [PATCH 258/293] use default declaration for the STL allocator class --- ide/vs2019/mimalloc.vcxproj | 2 ++ include/mimalloc.h | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index fad6de5d..e18db0c5 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -151,6 +151,7 @@ Default CompileAsCpp true + Default true @@ -178,6 +179,7 @@ Default CompileAsCpp true + Default true diff --git a/include/mimalloc.h b/include/mimalloc.h index f94d9ee7..caf71726 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -25,11 +25,11 @@ terms of the MIT license. A copy of the license can be found in the file #endif #if (__cplusplus >= 201703) -#define mi_decl_nodiscard [[nodiscard]] -#elif (__GNUC__ >= 4) // includes clang and icc -#define mi_decl_nodiscard __attribute__((warn_unused_result)) +#define mi_decl_nodiscard [[nodiscard]] +#elif (__GNUC__ >= 4) +#define mi_decl_nodiscard __attribute__((warn_unused_result)) #elif (_MSC_VER >= 1700) -#define mi_decl_nodiscard _Check_return_ +#define mi_decl_nodiscard _Check_return_ #else #define mi_decl_nodiscard #endif @@ -325,10 +325,10 @@ mi_decl_export void mi_option_set_default(mi_option_t option, long value); mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; -mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; -mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; +mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; +mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; -mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; +mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); mi_decl_nodiscard mi_decl_export void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); @@ -341,8 +341,8 @@ mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_ mi_decl_nodiscard mi_decl_export unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept; -mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept; -mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept; +mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept; +mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept; mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; @@ -384,8 +384,8 @@ template struct mi_stl_allocator { typedef value_type const* const_pointer; template struct rebind { typedef mi_stl_allocator other; }; - mi_stl_allocator() mi_attr_noexcept { } - mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept { } + mi_stl_allocator() mi_attr_noexcept = default; + mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept = default; template mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept { } mi_stl_allocator select_on_container_copy_construction() const { return *this; } void deallocate(T* p, size_type) { mi_free(p); } From bf2eb55ed12ace317fba24c74786c7e8da1253c6 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 1 Feb 2020 17:48:26 -0800 Subject: [PATCH 259/293] reformatting --- include/mimalloc.h | 78 ++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index caf71726..346774b7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -25,50 +25,50 @@ terms of the MIT license. A copy of the license can be found in the file #endif #if (__cplusplus >= 201703) -#define mi_decl_nodiscard [[nodiscard]] + #define mi_decl_nodiscard [[nodiscard]] #elif (__GNUC__ >= 4) -#define mi_decl_nodiscard __attribute__((warn_unused_result)) + #define mi_decl_nodiscard __attribute__((warn_unused_result)) #elif (_MSC_VER >= 1700) -#define mi_decl_nodiscard _Check_return_ + #define mi_decl_nodiscard _Check_return_ #else -#define mi_decl_nodiscard + #define mi_decl_nodiscard #endif #ifdef _MSC_VER #if !defined(MI_SHARED_LIB) #define mi_decl_export #elif defined(MI_SHARED_LIB_EXPORT) - #define mi_decl_export __declspec(dllexport) + #define mi_decl_export __declspec(dllexport) #else - #define mi_decl_export __declspec(dllimport) + #define mi_decl_export __declspec(dllimport) #endif #if (_MSC_VER >= 1900) && !defined(__EDG__) - #define mi_decl_allocator __declspec(allocator) __declspec(restrict) + #define mi_decl_allocator __declspec(allocator) __declspec(restrict) #else - #define mi_decl_allocator __declspec(restrict) + #define mi_decl_allocator __declspec(restrict) #endif - #define mi_cdecl __cdecl + #define mi_cdecl __cdecl #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) #define mi_attr_alloc_align(p) -#elif defined(__GNUC__) // includes clang and icc - #define mi_cdecl // leads to warnings... __attribute__((cdecl)) - #define mi_decl_export __attribute__((visibility("default"))) +#elif defined(__GNUC__) // includes clang and icc + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) + #define mi_decl_export __attribute__((visibility("default"))) #define mi_decl_allocator - #define mi_attr_malloc __attribute__((malloc)) + #define mi_attr_malloc __attribute__((malloc)) #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5) - #define mi_attr_alloc_size(s) - #define mi_attr_alloc_size2(s1,s2) - #define mi_attr_alloc_align(p) + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) #elif defined(__INTEL_COMPILER) - #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) - #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) - #define mi_attr_alloc_align(p) + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) #else - #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) - #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) - #define mi_attr_alloc_align(p) __attribute__((alloc_align(p))) + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) __attribute__((alloc_align(p))) #endif #else #define mi_cdecl @@ -100,7 +100,7 @@ mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_calloc(size_t count, mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); mi_decl_export mi_decl_allocator void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export void mi_free(void* p) mi_attr_noexcept; +mi_decl_export void mi_free(void* p) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export char* mi_strdup(const char* s) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export char* mi_strndup(const char* s, size_t n) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept; @@ -168,6 +168,7 @@ mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_realloc_aligned_at(v // ------------------------------------------------------------------------------------- // Heaps: first-class, but can only allocate from the same thread that created it. // ------------------------------------------------------------------------------------- + struct mi_heap_s; typedef struct mi_heap_s mi_heap_t; @@ -259,16 +260,17 @@ mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + // ------------------------------------------------------ // Convenience // ------------------------------------------------------ -#define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp))) -#define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp))) -#define mi_calloc_tp(tp,n) ((tp*)mi_calloc(n,sizeof(tp))) -#define mi_mallocn_tp(tp,n) ((tp*)mi_mallocn(n,sizeof(tp))) -#define mi_reallocn_tp(p,tp,n) ((tp*)mi_reallocn(p,n,sizeof(tp))) -#define mi_recalloc_tp(p,tp,n) ((tp*)mi_recalloc(p,n,sizeof(tp))) +#define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp))) +#define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp))) +#define mi_calloc_tp(tp,n) ((tp*)mi_calloc(n,sizeof(tp))) +#define mi_mallocn_tp(tp,n) ((tp*)mi_mallocn(n,sizeof(tp))) +#define mi_reallocn_tp(p,tp,n) ((tp*)mi_reallocn(p,n,sizeof(tp))) +#define mi_recalloc_tp(p,tp,n) ((tp*)mi_recalloc(p,n,sizeof(tp))) #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp))) #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp))) @@ -307,15 +309,15 @@ typedef enum mi_option_e { } mi_option_t; -mi_decl_nodiscard mi_decl_export bool mi_option_is_enabled(mi_option_t option); -mi_decl_export void mi_option_enable(mi_option_t option); -mi_decl_export void mi_option_disable(mi_option_t option); -mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); -mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); +mi_decl_nodiscard mi_decl_export bool mi_option_is_enabled(mi_option_t option); +mi_decl_export void mi_option_enable(mi_option_t option); +mi_decl_export void mi_option_disable(mi_option_t option); +mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); +mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); -mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); -mi_decl_export void mi_option_set(mi_option_t option, long value); -mi_decl_export void mi_option_set_default(mi_option_t option, long value); +mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); +mi_decl_export void mi_option_set(mi_option_t option, long value); +mi_decl_export void mi_option_set_default(mi_option_t option, long value); // ------------------------------------------------------------------------------------------------------- @@ -349,7 +351,7 @@ mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; // The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`. -// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception).mi_decl_nodiscard ami_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception). mi_decl_nodiscard mi_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export void* mi_new_nothrow(size_t size) mi_attr_malloc mi_attr_alloc_size(1); From 4a5f3592c064e00ea1378732ab91b6bd2ebcaf04 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 09:25:39 -0800 Subject: [PATCH 260/293] fix build warning on FreeBSD --- src/os.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/os.c b/src/os.c index 6e8c12d8..8427a1b5 100644 --- a/src/os.c +++ b/src/os.c @@ -285,6 +285,7 @@ static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int pr } #else UNUSED(try_alignment); + UNUSED(mi_os_get_aligned_hint); #endif if (p==NULL) { p = mmap(addr,size,protect_flags,flags,fd,0); From d2db9f1fc26e9545bcacfb35376ccda473adf803 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 13:12:22 -0800 Subject: [PATCH 261/293] update thread local storage --- include/mimalloc-internal.h | 133 +++++++++++++++++++++--------------- src/init.c | 20 ++++-- 2 files changed, 95 insertions(+), 58 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 0e3ebed8..0669048e 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -10,18 +10,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-types.h" -#if defined(MI_MALLOC_OVERRIDE) -#if defined(__APPLE__) && (defined(__i386__) || defined(__x86_64__)) -#define MI_TLS_OSX_FAST -#define MI_TLS_OSX_SLOT 94 // seems unused, except in Webkit? See: -#elif defined(__APPLE__) -#include -#define MI_TLS_PTHREADS -#elif (defined(__OpenBSD__) || defined(__DragonFly__)) -#define MI_TLS_RECURSE_GUARD -#endif -#endif - #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else @@ -284,47 +272,53 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot ----------------------------------------------------------- */ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap -extern mi_heap_t _mi_heap_main; // statically allocated main backing heap extern bool _mi_process_is_initialized; +mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap -#if defined(MI_TLS_OSX_FAST) -#define MI_TLS_OSX_OFFSET (MI_TLS_OSX_SLOT*sizeof(void*)) -static inline void* mi_tls_osx_fast_get(void) { - void* ret; - __asm__("mov %%gs:%1, %0" : "=r" (ret) : "m" (*(void**)(MI_TLS_OSX_OFFSET))); - return ret; +#if defined(MI_MALLOC_OVERRIDE) +// On some systems, MacOSX, OpenBSD, and DragonFly, accessing a thread local variable leads to recursion +// as the access invokes malloc. We avoid this by stealing a TLS slot from the OS internal slots so no +// allocation is involved. On OSX we use the direct TLS slots, while on the BSD's we use space in the `pthread_t` structure. +#if defined(__MACH__) // OSX +#define MI_TLS_SLOT 89 // seems unused? (__PTK_FRAMEWORK_OLDGC_KEY9) see + // possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) +#elif defined(__OpenBSD__) +#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 1*sizeof(void*)) // offset `retval` +#elif defined(__DragonFly__) +#define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) +#endif +#endif + +#if defined(MI_TLS_SLOT) +static inline void* mi_tls_slot(size_t slot); // forward declaration +#elif defined(MI_TLS_PTHREAD_SLOT_OFS) +static inline mi_heap_t** mi_tls_pthread_heap_slot(void) { + pthread_t self = pthread_self(); + return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); } -static inline void mi_tls_osx_fast_set(void* value) { - __asm__("movq %1,%%gs:%0" : "=m" (*(void**)(MI_TLS_OSX_OFFSET)) : "rn" (value)); -} -#elif defined(MI_TLS_PTHREADS) -extern pthread_key_t _mi_heap_default_key; +#elif defined(MI_TLS_PTHREAD) +extern pthread_key_t _mi_heap_default_key; #else extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from #endif static inline mi_heap_t* mi_get_default_heap(void) { -#if defined(MI_TLS_OSX_FAST) - // Use a fixed slot in the TSD on MacOSX to avoid recursion (since the loader calls malloc). - // We use slot 94 (__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4) - // which seems unused except for the more recent Webkit - // Use with care. - mi_heap_t* heap = (mi_heap_t*)mi_tls_osx_fast_get(); +#if defined(MI_TLS_SLOT) + // Use steal a fixed slot in the TLS on MacOSX to avoid recursion (since the loader calls malloc). + mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); -#elif defined(MI_TLS_PTHREADS) - // Use pthreads for TLS; this is used on macOSX with interpose as the loader calls `malloc` - // to allocate TLS storage leading to recursive calls if __thread declared variables are accessed. - // Using pthreads allows us to initialize without recursive calls. (performance seems still quite good). - mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? (mi_heap_t*)&_mi_heap_empty : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); +#elif defined(MI_TLS_PTHREAD_SLOT_OFS) + mi_heap_t* heap = mi_tls_pthread_heap_slot(); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +#elif defined(MI_TLS_PTHREAD) + mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); #else #if defined(MI_TLS_RECURSE_GUARD) - // On some BSD platforms, like openBSD, the dynamic loader calls `malloc` - // to initialize thread local data (before our module is loaded). // To avoid recursion, we need to avoid accessing the thread local `_mi_default_heap` // until our module is loaded and use the statically allocated main heap until that time. // TODO: patch ourselves dynamically to avoid this check every time? - if (mi_unlikely(!_mi_process_is_initialized)) return &_mi_heap_main; + if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); #endif return _mi_heap_default; #endif @@ -344,6 +338,7 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) { } static inline uintptr_t _mi_ptr_cookie(const void* p) { + extern mi_heap_t _mi_heap_main; mi_assert_internal(_mi_heap_main.cookie != 0); return ((uintptr_t)p ^ _mi_heap_main.cookie); } @@ -669,24 +664,54 @@ static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { // Windows: works on Intel and ARM in both 32- and 64-bit return (uintptr_t)NtCurrentTeb(); } -#elif (defined(__GNUC__) || defined(__clang__)) && \ + +#elif defined(__GNUC__) && \ (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)) -// TLS register on x86 is in the FS or GS register -// see: https://akkadia.org/drepper/tls.pdf + +// TLS register on x86 is in the FS or GS register, see: https://akkadia.org/drepper/tls.pdf +static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { + void* res; + const size_t ofs = (slot*sizeof(void*)); +#if defined(__i386__) + __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // 32-bit always uses GS +#elif defined(__MACH__) && defined(__x86_64__) + __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS +#elif defined(__x86_64__) + __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS +#elif defined(__arm__) + void** tcb; UNUSED(ofs); + asm volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + res = tcb[slot]; +#elif defined(__aarch64__) + void** tcb; UNUSED(ofs); + asm volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + res = tcb[slot]; +#endif + return res; +} + +static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { + const size_t ofs = (slot*sizeof(void*)); +#if defined(__i386__) + __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS +#elif defined(__MACH__) && defined(__x86_64__) + __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOSX uses GS +#elif defined(__x86_64__) + __asm__("movq %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS +#elif defined(__arm__) + void** tcb; UNUSED(ofs); + asm volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + tcb[slot] = value; +#elif defined(__aarch64__) + void** tcb; UNUSED(ofs); + asm volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + tcb[slot] = value; +#endif +} + static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { - uintptr_t tid; - #if defined(__i386__) - __asm__("movl %%gs:0, %0" : "=r" (tid) : : ); // 32-bit always uses GS - #elif defined(__MACH__) - __asm__("movq %%gs:0, %0" : "=r" (tid) : : ); // x86_64 macOS uses GS - #elif defined(__x86_64__) - __asm__("movq %%fs:0, %0" : "=r" (tid) : : ); // x86_64 Linux, BSD uses FS - #elif defined(__arm__) - asm volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid)); - #elif defined(__aarch64__) - asm volatile ("mrs %0, tpidr_el0" : "=r" (tid)); - #endif - return tid; + // normally, slot 0 is the pointer to the thread control block + return (uintptr_t)mi_tls_slot(0); } #else // otherwise use standard C diff --git a/src/init.c b/src/init.c index 960cccf1..f59daa9e 100644 --- a/src/init.c +++ b/src/init.c @@ -107,6 +107,8 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) #define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) +extern mi_heap_t _mi_heap_main; + static mi_tld_t tld_main = { 0, false, &_mi_heap_main, @@ -146,6 +148,11 @@ static void mi_heap_main_init(void) { } } +mi_heap_t* _mi_heap_main_get(void) { + mi_heap_main_init(); + return &_mi_heap_main; +} + /* ----------------------------------------------------------- Initialization and freeing of the thread local heaps @@ -333,9 +340,11 @@ static void _mi_thread_done(mi_heap_t* heap) { void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(heap != NULL); - #if defined(MI_TLS_OSX_FAST) - mi_tls_osx_fast_set(heap); - #elif defined(MI_TLS_PTHREADS) + #if defined(MI_TLS_SLOT) + mi_tls_slot_set(MI_TLS_SLOT,heap); + #elif defined(MI_TLS_PTHREAD_SLOT_OFS) + *mi_tls_pthread_heap_slot() = heap; + #elif defined(MI_TLS_PTHREAD) // we use _mi_heap_default_key #else _mi_heap_default = heap; @@ -406,13 +415,16 @@ static void mi_allocator_done() { // Called once by the process loader static void mi_process_load(void) { + mi_heap_main_init(); + #if defined(MI_TLS_RECURSE_GUARD) volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; UNUSED(dummy); + #endif os_preloading = false; atexit(&mi_process_done); _mi_options_init(); mi_process_init(); - //mi_stats_reset(); + //mi_stats_reset();- if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); // show message from the redirector (if present) From 8bc20631e47b8c0ec79efb5f2452e958bffb4558 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 13:25:26 -0800 Subject: [PATCH 262/293] fixes for freeBSD --- include/mimalloc-internal.h | 4 +++- src/init.c | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 0669048e..cfbdc9ca 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -292,11 +292,13 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea #if defined(MI_TLS_SLOT) static inline void* mi_tls_slot(size_t slot); // forward declaration #elif defined(MI_TLS_PTHREAD_SLOT_OFS) +#include static inline mi_heap_t** mi_tls_pthread_heap_slot(void) { pthread_t self = pthread_self(); return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); } #elif defined(MI_TLS_PTHREAD) +#include extern pthread_key_t _mi_heap_default_key; #else extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from @@ -308,7 +310,7 @@ static inline mi_heap_t* mi_get_default_heap(void) { mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); #elif defined(MI_TLS_PTHREAD_SLOT_OFS) - mi_heap_t* heap = mi_tls_pthread_heap_slot(); + mi_heap_t* heap = *mi_tls_pthread_heap_slot(); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); #elif defined(MI_TLS_PTHREAD) mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); diff --git a/src/init.c b/src/init.c index f59daa9e..b7f329cb 100644 --- a/src/init.c +++ b/src/init.c @@ -168,7 +168,7 @@ typedef struct mi_thread_data_s { static bool _mi_heap_init(void) { if (mi_heap_is_initialized(mi_get_default_heap())) return true; if (_mi_is_main_thread()) { - mi_assert_internal(_mi_heap_main.thread_id != 0); + // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization // the main heap is statically allocated mi_heap_main_init(); _mi_heap_set_default_direct(&_mi_heap_main); @@ -358,8 +358,9 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(mi_fls_key != 0); FlsSetValue(mi_fls_key, heap); #elif defined(MI_USE_PTHREADS) - mi_assert_internal(_mi_heap_default_key != (pthread_key_t)(-1)); + if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD pthread_setspecific(_mi_heap_default_key, heap); + } #endif } From 07fbe4f80f04a417bb19ac83113f73e1d1db3393 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 14:31:28 -0800 Subject: [PATCH 263/293] fixes for dragonfly --- include/mimalloc-internal.h | 7 +++++++ src/options.c | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cfbdc9ca..b11cb5fe 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -285,6 +285,7 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea #elif defined(__OpenBSD__) #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 1*sizeof(void*)) // offset `retval` #elif defined(__DragonFly__) +#warning "mimalloc is not working correctly on DragonFly yet." #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) #endif #endif @@ -295,6 +296,12 @@ static inline void* mi_tls_slot(size_t slot); // forward declaration #include static inline mi_heap_t** mi_tls_pthread_heap_slot(void) { pthread_t self = pthread_self(); + #if defined(__DragonFly__) + if (self==NULL) { + static mi_heap_t* pheap_main = _mi_heap_main_get(); + return &pheap_main; + } + #endif return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); } #elif defined(MI_TLS_PTHREAD) diff --git a/src/options.c b/src/options.c index ec58c31c..0af4a485 100644 --- a/src/options.c +++ b/src/options.c @@ -70,7 +70,11 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) +#if defined(__NetBSD__) + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed +#else { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed +#endif { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose From 865965b8c0c83674018be95b9bfdd65a4d2f7d2e Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 15:43:13 -0800 Subject: [PATCH 264/293] fix warnings under clang-cl --- src/alloc-posix.c | 1 - src/os.c | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/alloc-posix.c b/src/alloc-posix.c index 505e42e4..234c39a3 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -9,7 +9,6 @@ terms of the MIT license. A copy of the license can be found in the file // mi prefixed publi definitions of various Posix, Unix, and C++ functions // for convenience and used when overriding these functions. // ------------------------------------------------------------------------ - #include "mimalloc.h" #include "mimalloc-internal.h" diff --git a/src/os.c b/src/os.c index 8427a1b5..aa49400d 100644 --- a/src/os.c +++ b/src/os.c @@ -192,7 +192,7 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats if (was_committed) _mi_stat_decrease(&stats->committed, size); _mi_stat_decrease(&stats->reserved, size); if (err) { -#pragma warning(suppress:4996) + #pragma warning(suppress:4996) _mi_warning_message("munmap failed: %s, addr 0x%8li, size %lu\n", strerror(errno), (size_t)addr, size); return false; } @@ -215,9 +215,9 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) // on modern Windows try use VirtualAlloc2 for aligned allocation if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { - MEM_ADDRESS_REQUIREMENTS reqs = { 0 }; + MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; reqs.Alignment = try_alignment; - MEM_EXTENDED_PARAMETER param = { 0 }; + MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; param.Type = MemExtendedParameterAddressRequirements; param.Pointer = &reqs; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); @@ -828,7 +828,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) mi_win_enable_large_os_pages(); #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; + MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { From f5ab2c1c49bfd153db341e68dcb86fe045bec445 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 15:50:02 -0800 Subject: [PATCH 265/293] suppress spurious warnings with clang-cl --- ide/vs2017/mimalloc-override.vcxproj | 8 ++++---- ide/vs2017/mimalloc-test-stress.vcxproj | 4 ++-- ide/vs2017/mimalloc.vcxproj | 20 ++++++++++++++++---- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 4225a2f9..f828ba97 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -95,7 +95,7 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); + _CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false Default @@ -123,7 +123,7 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); + _CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false Default @@ -152,7 +152,7 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG + _CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) false @@ -184,7 +184,7 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG + _CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) false diff --git a/ide/vs2017/mimalloc-test-stress.vcxproj b/ide/vs2017/mimalloc-test-stress.vcxproj index 325ba3ff..b8267d0b 100644 --- a/ide/vs2017/mimalloc-test-stress.vcxproj +++ b/ide/vs2017/mimalloc-test-stress.vcxproj @@ -149,8 +149,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index e08deec4..fa236d64 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -90,6 +90,18 @@ .lib mimalloc-static + + false + + + false + + + false + + + false + Level3 @@ -97,7 +109,7 @@ true true ../../include - MI_DEBUG=3;%(PreprocessorDefinitions); + _CRT_SECURE_NO_WARNINGS;MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false stdcpp17 @@ -116,7 +128,7 @@ true true ../../include - MI_DEBUG=3;%(PreprocessorDefinitions); + _CRT_SECURE_NO_WARNINGS;MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false stdcpp17 @@ -143,7 +155,7 @@ true true ../../include - %(PreprocessorDefinitions);NDEBUG + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) false @@ -170,7 +182,7 @@ true true ../../include - %(PreprocessorDefinitions);NDEBUG + _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) false From 12c4108abe44ac5e084e9d12ee4dba8c7718ba24 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 16:09:09 -0800 Subject: [PATCH 266/293] update comments --- include/mimalloc-internal.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index b11cb5fe..75aea2e2 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -267,18 +267,25 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot } -/* ----------------------------------------------------------- - The thread local default heap ------------------------------------------------------------ */ +/* ---------------------------------------------------------------------------------------- +The thread local default heap: `_mi_get_default_heap` return the thread local heap. +On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a +__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures +that the storage will always be available (allocated on the thread stacks). +On some platforms though we cannot use that when overriding `malloc` since the underlying +TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. +We try to circumvent this in an efficient way: +- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the + loader itself calls `malloc` even before the modules are initialized. +- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). +- DragonFly: not yet working. +------------------------------------------------------------------------------------------- */ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap extern bool _mi_process_is_initialized; mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap #if defined(MI_MALLOC_OVERRIDE) -// On some systems, MacOSX, OpenBSD, and DragonFly, accessing a thread local variable leads to recursion -// as the access invokes malloc. We avoid this by stealing a TLS slot from the OS internal slots so no -// allocation is involved. On OSX we use the direct TLS slots, while on the BSD's we use space in the `pthread_t` structure. #if defined(__MACH__) // OSX #define MI_TLS_SLOT 89 // seems unused? (__PTK_FRAMEWORK_OLDGC_KEY9) see // possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) @@ -313,7 +320,6 @@ extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate static inline mi_heap_t* mi_get_default_heap(void) { #if defined(MI_TLS_SLOT) - // Use steal a fixed slot in the TLS on MacOSX to avoid recursion (since the loader calls malloc). mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); #elif defined(MI_TLS_PTHREAD_SLOT_OFS) @@ -323,10 +329,7 @@ static inline mi_heap_t* mi_get_default_heap(void) { mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); #else - #if defined(MI_TLS_RECURSE_GUARD) - // To avoid recursion, we need to avoid accessing the thread local `_mi_default_heap` - // until our module is loaded and use the statically allocated main heap until that time. - // TODO: patch ourselves dynamically to avoid this check every time? + #if defined(MI_TLS_RECURSE_GUARD) if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); #endif return _mi_heap_default; @@ -662,9 +665,8 @@ static inline size_t _mi_os_numa_node_count(void) { // ------------------------------------------------------------------- -// Getting the thread id should be performant -// as it is called in the fast path of `_mi_free`, -// so we specialize for various platforms. +// Getting the thread id should be performant as it is called in the +// fast path of `_mi_free` and we specialize for various platforms. // ------------------------------------------------------------------- #if defined(_WIN32) #define WIN32_LEAN_AND_MEAN @@ -699,6 +701,7 @@ static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { return res; } +// setting is only used on macOSX for now static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { const size_t ofs = (slot*sizeof(void*)); #if defined(__i386__) @@ -719,7 +722,7 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { } static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { - // normally, slot 0 is the pointer to the thread control block + // in all our targets, slot 0 is the pointer to the thread control block return (uintptr_t)mi_tls_slot(0); } #else From f0dc6e7e42e7e7a45d62ba96da014c5f8e568a10 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 16:21:06 -0800 Subject: [PATCH 267/293] add extra alignment test --- src/alloc-aligned.c | 2 +- test/test-api.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 05dd5fc6..c4c29ee8 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -19,9 +19,9 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t // the address at offset is aligned regardless of the allocated size. mi_assert(alignment > 0 && alignment % sizeof(void*) == 0); - if (alignment <= MI_MAX_ALIGN_SIZE && offset==0) return _mi_heap_malloc_zero(heap, size, zero); if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see ) if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see ) + if (alignment <= MI_MAX_ALIGN_SIZE && offset==0) return _mi_heap_malloc_zero(heap, size, zero); const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` // try if there is a small block available with just the right alignment diff --git a/test/test-api.c b/test/test-api.c index 95891754..2d26e14d 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -140,6 +140,13 @@ int main() { CHECK_BODY("malloc-aligned2", { void* p = mi_malloc_aligned(48,32); result = (p != NULL && (uintptr_t)(p) % 32 == 0); mi_free(p); }); + CHECK_BODY("malloc-aligned3", { + void* p1 = mi_malloc_aligned(48,32); bool result1 = (p1 != NULL && (uintptr_t)(p1) % 32 == 0); + void* p2 = mi_malloc_aligned(48,32); bool result2 = (p2 != NULL && (uintptr_t)(p2) % 32 == 0); + mi_free(p2); + mi_free(p1); + result = (result1&&result2); + }); CHECK_BODY("malloc-aligned-at1", { void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p); }); From 757dcc84115eeccb93ff23e177851c6d0d88f8ea Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 19:07:26 -0800 Subject: [PATCH 268/293] extend interpose for macOSX --- include/mimalloc-internal.h | 24 ++++++++++++------------ src/alloc-override.c | 17 +++++++++-------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 75aea2e2..37722cd9 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -269,11 +269,11 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot /* ---------------------------------------------------------------------------------------- The thread local default heap: `_mi_get_default_heap` return the thread local heap. -On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a +On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a __thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures -that the storage will always be available (allocated on the thread stacks). -On some platforms though we cannot use that when overriding `malloc` since the underlying -TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. +that the storage will always be available (allocated on the thread stacks). +On some platforms though we cannot use that when overriding `malloc` since the underlying +TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. We try to circumvent this in an efficient way: - macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the loader itself calls `malloc` even before the modules are initialized. @@ -285,11 +285,11 @@ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value o extern bool _mi_process_is_initialized; mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap -#if defined(MI_MALLOC_OVERRIDE) +#if defined(MI_MALLOC_OVERRIDE) #if defined(__MACH__) // OSX -#define MI_TLS_SLOT 89 // seems unused? (__PTK_FRAMEWORK_OLDGC_KEY9) see +#define MI_TLS_SLOT 84 // seems unused? (__PTK_FRAMEWORK_OLDGC_KEY9) see // possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) -#elif defined(__OpenBSD__) +#elif defined(__OpenBSD__) #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 1*sizeof(void*)) // offset `retval` #elif defined(__DragonFly__) #warning "mimalloc is not working correctly on DragonFly yet." @@ -299,7 +299,7 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea #if defined(MI_TLS_SLOT) static inline void* mi_tls_slot(size_t slot); // forward declaration -#elif defined(MI_TLS_PTHREAD_SLOT_OFS) +#elif defined(MI_TLS_PTHREAD_SLOT_OFS) #include static inline mi_heap_t** mi_tls_pthread_heap_slot(void) { pthread_t self = pthread_self(); @@ -308,7 +308,7 @@ static inline mi_heap_t** mi_tls_pthread_heap_slot(void) { static mi_heap_t* pheap_main = _mi_heap_main_get(); return &pheap_main; } - #endif + #endif return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); } #elif defined(MI_TLS_PTHREAD) @@ -319,7 +319,7 @@ extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate #endif static inline mi_heap_t* mi_get_default_heap(void) { -#if defined(MI_TLS_SLOT) +#if defined(MI_TLS_SLOT) mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); #elif defined(MI_TLS_PTHREAD_SLOT_OFS) @@ -329,7 +329,7 @@ static inline mi_heap_t* mi_get_default_heap(void) { mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); #else - #if defined(MI_TLS_RECURSE_GUARD) + #if defined(MI_TLS_RECURSE_GUARD) if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); #endif return _mi_heap_default; @@ -665,7 +665,7 @@ static inline size_t _mi_os_numa_node_count(void) { // ------------------------------------------------------------------- -// Getting the thread id should be performant as it is called in the +// Getting the thread id should be performant as it is called in the // fast path of `_mi_free` and we specialize for various platforms. // ------------------------------------------------------------------- #if defined(_WIN32) diff --git a/src/alloc-override.c b/src/alloc-override.c index 58996c5f..c0fdf161 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -41,26 +41,27 @@ terms of the MIT license. A copy of the license can be found in the file #endif #if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE) - static void mi_free_tls_safe(void* p) { - if (mi_unlikely(_mi_preloading())) return; - mi_free(p); - } // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` // See: struct mi_interpose_s { const void* replacement; const void* target; }; - #define MI_INTERPOSEX(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } - #define MI_INTERPOSE_MI(fun) MI_INTERPOSEX(fun,mi_##fun) + #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } + #define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) __attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) = { MI_INTERPOSE_MI(malloc), MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(realloc), - MI_INTERPOSEX(free,mi_free_tls_safe), MI_INTERPOSE_MI(strdup), - MI_INTERPOSE_MI(strndup) + MI_INTERPOSE_MI(strndup), + MI_INTERPOSE_MI(realpath), + MI_INTERPOSE_MI(posix_memalign), + MI_INTERPOSE_MI(reallocf), + MI_INTERPOSE_MI(valloc), + // some code allocates from a zone but deallocates using plain free :-( (like NxHashResizeToCapacity ) + MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us }; #elif defined(_MSC_VER) // cannot override malloc unless using a dll. From f3c47c7c91801c712db08d6944503132defef039 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 21:03:09 -0800 Subject: [PATCH 269/293] improved malloc zone handling on macOSX (not working yet) --- include/mimalloc-internal.h | 2 +- src/alloc-override-osx.c | 24 +++++++++++++++++++++++- src/alloc-override.c | 14 +++++++------- src/alloc.c | 16 ++++++++-------- src/init.c | 8 ++++---- test/test-stress.c | 2 +- 6 files changed, 44 insertions(+), 22 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 37722cd9..4ac7da78 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -298,7 +298,7 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea #endif #if defined(MI_TLS_SLOT) -static inline void* mi_tls_slot(size_t slot); // forward declaration +static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept; // forward declaration #elif defined(MI_TLS_PTHREAD_SLOT_OFS) #include static inline mi_heap_t** mi_tls_pthread_heap_slot(void) { diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c index d4f8b06d..ed0bc2de 100644 --- a/src/alloc-override-osx.c +++ b/src/alloc-override-osx.c @@ -14,6 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file #error "this file should only be included on macOS" #endif +#warning "malloc zones do not seem to work for now; use MI_INTERPOSE instead" /* ------------------------------------------------------ Override system malloc on macOS This is done through the malloc zone interface. @@ -35,34 +36,42 @@ extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_im ------------------------------------------------------ */ static size_t zone_size(malloc_zone_t* zone, const void* p) { + UNUSED(zone); UNUSED(p); return 0; // as we cannot guarantee that `p` comes from us, just return 0 } static void* zone_malloc(malloc_zone_t* zone, size_t size) { + UNUSED(zone); return mi_malloc(size); } static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) { + UNUSED(zone); return mi_calloc(count, size); } static void* zone_valloc(malloc_zone_t* zone, size_t size) { + UNUSED(zone); return mi_malloc_aligned(size, _mi_os_page_size()); } static void zone_free(malloc_zone_t* zone, void* p) { + UNUSED(zone); return mi_free(p); } static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { + UNUSED(zone); return mi_realloc(p, newsize); } static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) { + UNUSED(zone); return mi_malloc_aligned(size,alignment); } static void zone_destroy(malloc_zone_t* zone) { + UNUSED(zone); // todo: ignore for now? } @@ -83,11 +92,13 @@ static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) { } static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) { + UNUSED(zone); UNUSED(size); mi_collect(false); return 0; } static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) { + UNUSED(size); zone_free(zone,p); } @@ -102,34 +113,43 @@ static kern_return_t intro_enumerator(task_t task, void* p, vm_range_recorder_t recorder) { // todo: enumerate all memory + UNUSED(task); UNUSED(p); UNUSED(type_mask); UNUSED(zone_address); + UNUSED(reader); UNUSED(recorder); return KERN_SUCCESS; } static size_t intro_good_size(malloc_zone_t* zone, size_t size) { + UNUSED(zone); return mi_good_size(size); } static boolean_t intro_check(malloc_zone_t* zone) { + UNUSED(zone); return true; } static void intro_print(malloc_zone_t* zone, boolean_t verbose) { + UNUSED(zone); UNUSED(verbose); mi_stats_print(NULL); } static void intro_log(malloc_zone_t* zone, void* p) { + UNUSED(zone); UNUSED(p); // todo? } static void intro_force_lock(malloc_zone_t* zone) { + UNUSED(zone); // todo? } static void intro_force_unlock(malloc_zone_t* zone) { + UNUSED(zone); // todo? } static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { + UNUSED(zone); // todo... stats->blocks_in_use = 0; stats->size_in_use = 0; @@ -138,6 +158,7 @@ static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { } static boolean_t intro_zone_locked(malloc_zone_t* zone) { + UNUSED(zone); return false; } @@ -161,7 +182,6 @@ static malloc_zone_t* mi_get_default_zone() } } - static void __attribute__((constructor)) _mi_macos_override_malloc() { static malloc_introspection_t intro; @@ -201,6 +221,7 @@ static void __attribute__((constructor)) _mi_macos_override_malloc() zone.free_definite_size = &zone_free_definite_size; zone.pressure_relief = &zone_pressure_relief; intro.zone_locked = &intro_zone_locked; + intro.statistics = &intro_statistics; // force the purgeable zone to exist to avoid strange bugs if (malloc_default_purgeable_zone) { @@ -225,6 +246,7 @@ static void __attribute__((constructor)) _mi_macos_override_malloc() malloc_zone_unregister(purgeable_zone); malloc_zone_register(purgeable_zone); } + } #endif // MI_MALLOC_OVERRIDE diff --git a/src/alloc-override.c b/src/alloc-override.c index c0fdf161..151c2333 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file #error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)" #endif -#if defined(MI_MALLOC_OVERRIDE) && !defined(_WIN32) +#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32) || (defined(__MACH__) && !defined(MI_INTERPOSE))) // ------------------------------------------------------ // Override system malloc @@ -68,10 +68,10 @@ terms of the MIT license. A copy of the license can be found in the file // we just override new/delete which does work in a static library. #else // On all other systems forward to our API - void* malloc(size_t size) mi_attr_noexcept MI_FORWARD1(mi_malloc, size); - void* calloc(size_t size, size_t n) mi_attr_noexcept MI_FORWARD2(mi_calloc, size, n); - void* realloc(void* p, size_t newsize) mi_attr_noexcept MI_FORWARD2(mi_realloc, p, newsize); - void free(void* p) mi_attr_noexcept MI_FORWARD0(mi_free, p); + void* malloc(size_t size) MI_FORWARD1(mi_malloc, size); + void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n); + void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize); + void free(void* p) MI_FORWARD0(mi_free, p); #endif #if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__) @@ -99,8 +99,8 @@ terms of the MIT license. A copy of the license can be found in the file void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } #if (__cplusplus >= 201402L || _MSC_VER >= 1916) - void operator delete (void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n); - void operator delete[](void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n); + void operator delete (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n); + void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n); #endif #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5)) diff --git a/src/alloc.c b/src/alloc.c index 61f34353..d2fbe4b1 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -212,7 +212,7 @@ static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* bl size_t delta; bool ok = mi_page_decode_padding(page, block, &delta, &bsize); mi_assert_internal(ok); mi_assert_internal(delta <= bsize); - return (ok ? bsize - delta : 0); + return (ok ? bsize - delta : 0); } static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { @@ -259,7 +259,7 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); padding->delta = (uint32_t)new_delta; } -#else +#else static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { UNUSED(page); UNUSED(block); @@ -359,7 +359,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block } else if (mi_unlikely(mi_page_is_in_full(page))) { _mi_page_unfull(page); - } + } } else { _mi_free_block_mt(page,block); @@ -401,7 +401,7 @@ void mi_free(void* p) mi_attr_noexcept "(this may still be a valid very large allocation (over 64MiB))\n", p); if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); - } + } } #endif #if (MI_DEBUG!=0 || MI_SECURE>=4) @@ -421,11 +421,11 @@ void mi_free(void* p) mi_attr_noexcept mi_heap_stat_decrease(heap, malloc, bsize); if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { // huge page stats are accounted for in `_mi_page_retire` mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], 1); - } + } #endif if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks - // local, and not full or aligned + // local, and not full or aligned if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_check_padding(page, block); #if (MI_DEBUG!=0) @@ -436,7 +436,7 @@ void mi_free(void* p) mi_attr_noexcept page->used--; if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); - } + } } else { // non-local, aligned blocks, or a full page; use the more generic path @@ -473,7 +473,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept { const mi_segment_t* const segment = _mi_ptr_segment(p); const mi_page_t* const page = _mi_segment_page_of(segment, p); const mi_block_t* const block = (const mi_block_t*)p; - const size_t size = mi_page_usable_size_of(page, block); + const size_t size = mi_page_usable_size_of(page, block); if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); diff --git a/src/init.c b/src/init.c index b7f329cb..2f5ca224 100644 --- a/src/init.c +++ b/src/init.c @@ -34,7 +34,7 @@ const mi_page_t _mi_page_empty = { #if defined(MI_PADDING) && (MI_INTPTR_SIZE >= 8) #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } -#elif defined(MI_PADDING) +#elif defined(MI_PADDING) #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } #else #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } @@ -190,7 +190,7 @@ static bool _mi_heap_init(void) { heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); heap->keys[1] = _mi_heap_random_next(heap); - heap->tld = tld; + heap->tld = tld; tld->heap_backing = heap; tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; @@ -421,9 +421,9 @@ static void mi_process_load(void) { volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; UNUSED(dummy); #endif - os_preloading = false; + os_preloading = false; atexit(&mi_process_done); - _mi_options_init(); + _mi_options_init(); mi_process_init(); //mi_stats_reset();- if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); diff --git a/test/test-stress.c b/test/test-stress.c index 7d8993a0..f1c8b2e1 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -38,7 +38,7 @@ static bool allow_large_objects = true; // allow very large objects? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? -#ifdef USE_STD_MALLOC +#ifndef USE_STD_MALLOC #define custom_calloc(n,s) calloc(n,s) #define custom_realloc(p,s) realloc(p,s) #define custom_free(p) free(p) From feb0699bcb3f81cb14964ff8e3d92788241b1cd0 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 22:01:04 -0800 Subject: [PATCH 270/293] fix aligment check when padding is enabled --- ide/vs2019/mimalloc-override.vcxproj.filters | 8 ++++---- src/alloc-aligned.c | 9 +++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index 83d6f7fe..8e36f50e 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -1,9 +1,6 @@  - - Header Files - Source Files @@ -49,6 +46,9 @@ Source Files + + Source Files + @@ -78,4 +78,4 @@ {39cb7e38-69d0-43fb-8406-6a0f7cefc3b4} - + \ No newline at end of file diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index c4c29ee8..40362068 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -25,15 +25,16 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` // try if there is a small block available with just the right alignment - if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { - mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); + const size_t padsize = size + MI_PADDING_SIZE; + if (mi_likely(padsize <= MI_SMALL_SIZE_MAX)) { + mi_page_t* page = _mi_heap_get_free_small_page(heap,padsize); const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; if (mi_likely(page->free != NULL && is_aligned)) { #if MI_STAT>1 mi_heap_stat_increase( heap, malloc, size); #endif - void* p = _mi_page_malloc(heap,page,size + MI_PADDING_SIZE); // TODO: inline _mi_page_malloc + void* p = _mi_page_malloc(heap,page,padsize); // TODO: inline _mi_page_malloc mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); if (zero) _mi_block_zero_init(page,p,size); @@ -42,7 +43,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t } // use regular allocation if it is guaranteed to fit the alignment constraints - if (offset==0 && alignment<=size && size<=MI_MEDIUM_OBJ_SIZE_MAX && (size&align_mask)==0) { + if (offset==0 && alignment<=padsize && padsize<=MI_MEDIUM_OBJ_SIZE_MAX && (padsize&align_mask)==0) { void* p = _mi_heap_malloc_zero(heap, size, zero); mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); return p; From 1c2e0a47cada2cd689f34db18b28ca41a53cc1f6 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 22:04:53 -0800 Subject: [PATCH 271/293] fix noexcept attribute on array delete operators --- include/mimalloc-new-delete.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-new-delete.h b/include/mimalloc-new-delete.h index 050f9433..fded0c04 100644 --- a/include/mimalloc-new-delete.h +++ b/include/mimalloc-new-delete.h @@ -32,8 +32,8 @@ terms of the MIT license. A copy of the license can be found in the file void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } #if (__cplusplus >= 201402L || _MSC_VER >= 1916) - void operator delete (void* p, std::size_t n) { mi_free_size(p,n); }; - void operator delete[](void* p, std::size_t n) { mi_free_size(p,n); }; + void operator delete (void* p, std::size_t n) noexcept { mi_free_size(p,n); }; + void operator delete[](void* p, std::size_t n) noexcept { mi_free_size(p,n); }; #endif #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) From b241be7075c32bd3952f4d9f7eb22c6531b8397e Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 22:08:33 -0800 Subject: [PATCH 272/293] reenable mimalloc in the stress test --- test/test-stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test-stress.c b/test/test-stress.c index f1c8b2e1..7d8993a0 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -38,7 +38,7 @@ static bool allow_large_objects = true; // allow very large objects? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? -#ifndef USE_STD_MALLOC +#ifdef USE_STD_MALLOC #define custom_calloc(n,s) calloc(n,s) #define custom_realloc(p,s) realloc(p,s) #define custom_free(p) free(p) From 3560e0a867a82b6a593a01ac4995c11498f0a167 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 22:15:09 -0800 Subject: [PATCH 273/293] fix TLS slot number on OSX --- include/mimalloc-internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 4ac7da78..b2297c50 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -268,7 +268,7 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot /* ---------------------------------------------------------------------------------------- -The thread local default heap: `_mi_get_default_heap` return the thread local heap. +The thread local default heap: `_mi_get_default_heap` returns the thread local heap. On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a __thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures that the storage will always be available (allocated on the thread stacks). @@ -287,7 +287,7 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea #if defined(MI_MALLOC_OVERRIDE) #if defined(__MACH__) // OSX -#define MI_TLS_SLOT 84 // seems unused? (__PTK_FRAMEWORK_OLDGC_KEY9) see +#define MI_TLS_SLOT 89 // seems unused? (__PTK_FRAMEWORK_OLDGC_KEY9) see // possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) #elif defined(__OpenBSD__) #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 1*sizeof(void*)) // offset `retval` From a96e94f940db7d844030239bfbedd004d5915657 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 2 Feb 2020 22:46:38 -0800 Subject: [PATCH 274/293] change TLS slot on OpenBSD --- include/mimalloc-internal.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index b2297c50..cea6b9c3 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -287,10 +287,13 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea #if defined(MI_MALLOC_OVERRIDE) #if defined(__MACH__) // OSX -#define MI_TLS_SLOT 89 // seems unused? (__PTK_FRAMEWORK_OLDGC_KEY9) see - // possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) +#define MI_TLS_SLOT 89 // seems unused? +// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) +// see #elif defined(__OpenBSD__) -#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 1*sizeof(void*)) // offset `retval` +// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) +// see +#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) #elif defined(__DragonFly__) #warning "mimalloc is not working correctly on DragonFly yet." #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) From e67606210326c838b8fa3004a83721df4d3c6dbe Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 5 Feb 2020 17:40:13 -0800 Subject: [PATCH 275/293] update mac zone code --- src/alloc-override-osx.c | 67 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c index ed0bc2de..99c6a134 100644 --- a/src/alloc-override-osx.c +++ b/src/alloc-override-osx.c @@ -182,6 +182,72 @@ static malloc_zone_t* mi_get_default_zone() } } +// directly overwrite the default zone as per: +// + +static void __attribute__((constructor)) _mi_macos_override_malloc_direct() +{ + static malloc_introspection_t intro; + memset(&intro, 0, sizeof(intro)); + + intro.enumerator = &intro_enumerator; + intro.good_size = &intro_good_size; + intro.check = &intro_check; + intro.print = &intro_print; + intro.log = &intro_log; + intro.force_lock = &intro_force_lock; + intro.force_unlock = &intro_force_unlock; + + static malloc_zone_t oldzone; + static malloc_zone_t* zone = malloc_default_zone(); // get the `malloc` backing default zone + if (zone == NULL) return; + + // save the default zone in oldzone + memset(&oldzone, 0, sizeof(oldzone)); + if (zone->version >= 9) memcpy(&oldzone, zone, sizeof(oldzone)); + + // overwrite default zone functions in-place + zone->zone_name = "mimalloc"; + zone->size = &zone_size; + zone->introspect = &intro; + zone->malloc = &zone_malloc; + zone->calloc = &zone_calloc; + zone->valloc = &zone_valloc; + zone->free = &zone_free; + zone->realloc = &zone_realloc; + zone->destroy = &zone_destroy; + zone->batch_malloc = &zone_batch_malloc; + zone->batch_free = &zone_batch_free; + + malloc_zone_t* purgeable_zone = NULL; + +#if defined(MAC_OS_X_VERSION_10_6) && \ + MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 + // switch to version 9 on OSX 10.6 to support memalign. + // zone->version = 9; + zone->memalign = &zone_memalign; + zone->free_definite_size = &zone_free_definite_size; + zone->pressure_relief = &zone_pressure_relief; + intro.zone_locked = &intro_zone_locked; + intro.statistics = &intro_statistics; + /* + // force the purgeable zone to exist to avoid strange bugs + if (malloc_default_purgeable_zone) { + purgeable_zone = malloc_default_purgeable_zone(); + } + */ +#endif + /* + // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs + // earlier than the default zone. + if (purgeable_zone != NULL) { + malloc_zone_unregister(purgeable_zone); + malloc_zone_register(purgeable_zone); + } + */ +} + +/* static void __attribute__((constructor)) _mi_macos_override_malloc() { static malloc_introspection_t intro; @@ -248,5 +314,6 @@ static void __attribute__((constructor)) _mi_macos_override_malloc() } } +*/ #endif // MI_MALLOC_OVERRIDE From 9062f397649da3b4851d9107cc5a2b01021faff5 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 8 Feb 2020 20:08:52 -0800 Subject: [PATCH 276/293] enable interpose separate from zones on macOS --- CMakeLists.txt | 16 +++++++++++----- src/alloc-override-osx.c | 20 ++++++++++++++++---- src/alloc-override.c | 2 +- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2da7974b..e16830aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,11 +5,12 @@ set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) option(MI_OVERRIDE "Override the standard malloc interface" ON) -option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON) option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode" OFF) option(MI_SECURE "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) +option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON) +option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" OFF) # enables interpose as well option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) @@ -61,14 +62,19 @@ endif() if(MI_OVERRIDE MATCHES "ON") message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") if(APPLE) + if(MI_OSX_ZONE MATCHES "ON") + # use zone's on macOS + message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)") + list(APPEND mi_sources src/alloc-override-osx.c) + if(NOT MI_INTERPOSE MATCHES "ON") + message(STATUS " (enabling INTERPOSE as well since zone's require this)") + set(MI_INTERPOSE "ON") + endif() + endif() if(MI_INTERPOSE MATCHES "ON") # use interpose on macOS message(STATUS " Use interpose to override malloc (MI_INTERPOSE=ON)") list(APPEND mi_defines MI_INTERPOSE) - else() - # use zone's on macOS - message(STATUS " Use zone's to override malloc (MI_INTERPOSE=OFF)") - list(APPEND mi_sources src/alloc-override-osx.c) endif() endif() endif() diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c index 99c6a134..92d5ce2b 100644 --- a/src/alloc-override-osx.c +++ b/src/alloc-override-osx.c @@ -14,7 +14,6 @@ terms of the MIT license. A copy of the license can be found in the file #error "this file should only be included on macOS" #endif -#warning "malloc zones do not seem to work for now; use MI_INTERPOSE instead" /* ------------------------------------------------------ Override system malloc on macOS This is done through the malloc zone interface. @@ -182,8 +181,10 @@ static malloc_zone_t* mi_get_default_zone() } } +#if 0 // directly overwrite the default zone as per: // +#include static void __attribute__((constructor)) _mi_macos_override_malloc_direct() { @@ -199,13 +200,18 @@ static void __attribute__((constructor)) _mi_macos_override_malloc_direct() intro.force_unlock = &intro_force_unlock; static malloc_zone_t oldzone; - static malloc_zone_t* zone = malloc_default_zone(); // get the `malloc` backing default zone + static malloc_zone_t* zone; + zone = mi_get_default_zone(); // get the `malloc` backing default zone if (zone == NULL) return; // save the default zone in oldzone memset(&oldzone, 0, sizeof(oldzone)); if (zone->version >= 9) memcpy(&oldzone, zone, sizeof(oldzone)); + if (zone->version >= 8) { + vm_protect(mach_task_self(), (uintptr_t)zone, sizeof(*zone), 0, + VM_PROT_READ|VM_PROT_WRITE); + } // overwrite default zone functions in-place zone->zone_name = "mimalloc"; zone->size = &zone_size; @@ -237,6 +243,11 @@ static void __attribute__((constructor)) _mi_macos_override_malloc_direct() } */ #endif + if (zone->version >= 8) { + vm_protect(mach_task_self(), (uintptr_t)zone, sizeof(*zone), 0, + VM_PROT_READ); + } + /* // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs // earlier than the default zone. @@ -247,7 +258,8 @@ static void __attribute__((constructor)) _mi_macos_override_malloc_direct() */ } -/* +#else + static void __attribute__((constructor)) _mi_macos_override_malloc() { static malloc_introspection_t intro; @@ -314,6 +326,6 @@ static void __attribute__((constructor)) _mi_macos_override_malloc() } } -*/ +#endif #endif // MI_MALLOC_OVERRIDE diff --git a/src/alloc-override.c b/src/alloc-override.c index 151c2333..c0e7bc2b 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file #error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)" #endif -#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32) || (defined(__MACH__) && !defined(MI_INTERPOSE))) +#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) // || (defined(__MACH__) && !defined(MI_INTERPOSE))) // ------------------------------------------------------ // Override system malloc From afe434463ac92bc140691c55c3922a53f4324bfb Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 9 Feb 2020 18:26:50 -0800 Subject: [PATCH 277/293] add comments on overriding in macOSX --- src/alloc-override-osx.c | 86 +++------------------------------------- 1 file changed, 6 insertions(+), 80 deletions(-) diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c index 92d5ce2b..cc03f5e2 100644 --- a/src/alloc-override-osx.c +++ b/src/alloc-override-osx.c @@ -17,6 +17,12 @@ terms of the MIT license. A copy of the license can be found in the file /* ------------------------------------------------------ Override system malloc on macOS This is done through the malloc zone interface. + It seems we also need to interpose (see `alloc-override.c`) + or otherwise we get zone errors as there are usually + already allocations done by the time we take over the + zone. Unfortunately, that means we need to replace + the `free` with a checked free (`cfree`) impacting + performance. ------------------------------------------------------ */ #include @@ -181,85 +187,6 @@ static malloc_zone_t* mi_get_default_zone() } } -#if 0 -// directly overwrite the default zone as per: -// -#include - -static void __attribute__((constructor)) _mi_macos_override_malloc_direct() -{ - static malloc_introspection_t intro; - memset(&intro, 0, sizeof(intro)); - - intro.enumerator = &intro_enumerator; - intro.good_size = &intro_good_size; - intro.check = &intro_check; - intro.print = &intro_print; - intro.log = &intro_log; - intro.force_lock = &intro_force_lock; - intro.force_unlock = &intro_force_unlock; - - static malloc_zone_t oldzone; - static malloc_zone_t* zone; - zone = mi_get_default_zone(); // get the `malloc` backing default zone - if (zone == NULL) return; - - // save the default zone in oldzone - memset(&oldzone, 0, sizeof(oldzone)); - if (zone->version >= 9) memcpy(&oldzone, zone, sizeof(oldzone)); - - if (zone->version >= 8) { - vm_protect(mach_task_self(), (uintptr_t)zone, sizeof(*zone), 0, - VM_PROT_READ|VM_PROT_WRITE); - } - // overwrite default zone functions in-place - zone->zone_name = "mimalloc"; - zone->size = &zone_size; - zone->introspect = &intro; - zone->malloc = &zone_malloc; - zone->calloc = &zone_calloc; - zone->valloc = &zone_valloc; - zone->free = &zone_free; - zone->realloc = &zone_realloc; - zone->destroy = &zone_destroy; - zone->batch_malloc = &zone_batch_malloc; - zone->batch_free = &zone_batch_free; - - malloc_zone_t* purgeable_zone = NULL; - -#if defined(MAC_OS_X_VERSION_10_6) && \ - MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 - // switch to version 9 on OSX 10.6 to support memalign. - // zone->version = 9; - zone->memalign = &zone_memalign; - zone->free_definite_size = &zone_free_definite_size; - zone->pressure_relief = &zone_pressure_relief; - intro.zone_locked = &intro_zone_locked; - intro.statistics = &intro_statistics; - /* - // force the purgeable zone to exist to avoid strange bugs - if (malloc_default_purgeable_zone) { - purgeable_zone = malloc_default_purgeable_zone(); - } - */ -#endif - if (zone->version >= 8) { - vm_protect(mach_task_self(), (uintptr_t)zone, sizeof(*zone), 0, - VM_PROT_READ); - } - - /* - // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs - // earlier than the default zone. - if (purgeable_zone != NULL) { - malloc_zone_unregister(purgeable_zone); - malloc_zone_register(purgeable_zone); - } - */ -} - -#else - static void __attribute__((constructor)) _mi_macos_override_malloc() { static malloc_introspection_t intro; @@ -326,6 +253,5 @@ static void __attribute__((constructor)) _mi_macos_override_malloc() } } -#endif #endif // MI_MALLOC_OVERRIDE From 5ba87e56c94a83db919be33fe5449bebc39e9d3e Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 9 Feb 2020 18:32:09 -0800 Subject: [PATCH 278/293] update readme for 1.5 release --- readme.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/readme.md b/readme.md index baac2a93..8d3d22e0 100644 --- a/readme.md +++ b/readme.md @@ -10,7 +10,7 @@ mimalloc (pronounced "me-malloc") is a general purpose allocator with excellent [performance](#performance) characteristics. Initially developed by Daan Leijen for the run-time systems of the -[Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. +[Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. Latest release:`v1.4.0` (2020-01-22). It is a drop-in replacement for `malloc` and can be used in other programs @@ -47,7 +47,7 @@ It also has an easy way to override the allocator in [Windows](#override_on_wind - __fast__: In our benchmarks (see [below](#performance)), _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and usually uses less memory (up to 25% more in the worst case). A nice property - is that it does consistently well over a wide range of benchmarks. There is also good huge OS page + is that it does consistently well over a wide range of benchmarks. There is also good huge OS page support for larger server programs. The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API. @@ -57,7 +57,8 @@ Enjoy! ### Releases -* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset, +* 2020-02-09, `v1.5.0`: stable release 1.5: improved free performance, small bug fixes. +* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset, more eager concurrent free, addition of STL allocator, fixed potential memory leak. * 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. @@ -212,13 +213,13 @@ or via environment variables. - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at startup and can give quite a (latency) performance improvement on long running workloads. Usually it is better to not use `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving - contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at + contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at startup only once). Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting `MIMALLOC_EAGER_COMMIT_DELAY=N` (with usually `N` as 1) to delay the initial `N` segments From 04f1c3b1e23677ce03bd16137e73089abd552175 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 9 Feb 2020 18:53:39 -0800 Subject: [PATCH 279/293] bump version to v1.6.0 --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- readme.md | 6 ++++++ test/CMakeLists.txt | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 5137be80..6454d91f 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,5 +1,5 @@ set(mi_version_major 1) -set(mi_version_minor 5) +set(mi_version_minor 6) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 346774b7..552a8b2b 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 150 // major + 2 digits minor +#define MI_MALLOC_VERSION 160 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes diff --git a/readme.md b/readme.md index 8d3d22e0..56f0430c 100644 --- a/readme.md +++ b/readme.md @@ -57,6 +57,12 @@ Enjoy! ### Releases +* 2020-02-09, `v1.6.0`: stable release 1.6: fixed potential memory leak, improved overriding + and thread local support on FreeBSD, NetBSD, DragonFly, and macOSX. New byte-precise + heap block overflow detection in debug mode (besides the double-free detection and free-list + corruption detection). Add `nodiscard` attribute to most allocation functions. + Enable `MIMALLOC_PAGE_RESET` by default. New reclamation strategy for abandoned heap pages + for better memory footprint. * 2020-02-09, `v1.5.0`: stable release 1.5: improved free performance, small bug fixes. * 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset, more eager concurrent free, addition of STL allocator, fixed potential memory leak. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ce077d14..4152f99d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 1.5 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +find_package(mimalloc 1.6 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") # overriding with a dynamic library From 0a77b7423f5beb4fb88def78cae84cdb368f0c8c Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 9 Feb 2020 19:12:19 -0800 Subject: [PATCH 280/293] Update readme.md --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 56f0430c..e4e96ba7 100644 --- a/readme.md +++ b/readme.md @@ -11,7 +11,7 @@ mimalloc (pronounced "me-malloc") is a general purpose allocator with excellent [performance](#performance) characteristics. Initially developed by Daan Leijen for the run-time systems of the [Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. -Latest release:`v1.4.0` (2020-01-22). +Latest release:`v1.6.0` (2020-02-09). It is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: From 9749c83ca0aa5b540a3cb4e901e471aa64423255 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 13 Feb 2020 09:16:41 -0800 Subject: [PATCH 281/293] fix build with debug and secure both enabled, issue #203 --- src/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index 23a04a84..6aaef428 100644 --- a/src/page.c +++ b/src/page.c @@ -105,7 +105,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); #if MI_SECURE - mi_assert_internal(page->key != 0); + mi_assert_internal(page->keys[0] != 0); #endif if (mi_page_heap(page)!=NULL) { mi_segment_t* segment = _mi_page_segment(page); From f42b8526d0767ae6605f43a198fe984b3d19aa5e Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 13 Feb 2020 10:36:39 -0800 Subject: [PATCH 282/293] fix wrong __declspec(restrict) and __attribute__((malloc)) attributes on reallocation functions --- include/mimalloc.h | 155 ++++++++++++++++++++--------------------- src/alloc-aligned.c | 48 ++++++------- src/alloc-posix.c | 12 ++-- src/alloc.c | 66 +++++++++--------- test/main-override.cpp | 36 +++++++--- 5 files changed, 168 insertions(+), 149 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 552a8b2b..d1120e9f 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -43,9 +43,9 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_export __declspec(dllimport) #endif #if (_MSC_VER >= 1900) && !defined(__EDG__) - #define mi_decl_allocator __declspec(allocator) __declspec(restrict) + #define mi_decl_restrict __declspec(allocator) __declspec(restrict) #else - #define mi_decl_allocator __declspec(restrict) + #define mi_decl_restrict __declspec(restrict) #endif #define mi_cdecl __cdecl #define mi_attr_malloc @@ -55,7 +55,7 @@ terms of the MIT license. A copy of the license can be found in the file #elif defined(__GNUC__) // includes clang and icc #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #define mi_decl_export __attribute__((visibility("default"))) - #define mi_decl_allocator + #define mi_decl_restrict #define mi_attr_malloc __attribute__((malloc)) #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5) #define mi_attr_alloc_size(s) @@ -73,7 +73,7 @@ terms of the MIT license. A copy of the license can be found in the file #else #define mi_cdecl #define mi_decl_export - #define mi_decl_allocator + #define mi_decl_restrict #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) @@ -95,15 +95,15 @@ extern "C" { // Standard malloc interface // ------------------------------------------------------ -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_export void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); mi_decl_export void mi_free(void* p) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export char* mi_strdup(const char* s) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export char* mi_strndup(const char* s, size_t n) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc; // ------------------------------------------------------ // Extended functionality @@ -111,13 +111,13 @@ mi_decl_nodiscard mi_decl_export char* mi_realpath(const char* fname, char* reso #define MI_SMALL_WSIZE_MAX (128) #define MI_SMALL_SIZE_MAX (MI_SMALL_WSIZE_MAX*sizeof(void*)) -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; @@ -155,14 +155,14 @@ mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_ // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2); // ------------------------------------------------------------------------------------- @@ -180,28 +180,28 @@ mi_decl_export mi_heap_t* mi_heap_get_default(void); mi_decl_export mi_heap_t* mi_heap_get_backing(void); mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);; +mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); -mi_decl_nodiscard mi_decl_export char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc; -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3) mi_attr_alloc_align(4); -mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3); // -------------------------------------------------------------------------------- @@ -211,21 +211,21 @@ mi_decl_nodiscard mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned // see // -------------------------------------------------------------------------------- -mi_decl_export mi_decl_allocator void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); -mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); -mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4); -mi_decl_export mi_decl_allocator void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(2,3); -mi_decl_export mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); -mi_decl_export mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4); -mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3) mi_attr_alloc_align(4); -mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); -mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5); -mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(3,4); // ------------------------------------------------------ @@ -233,7 +233,6 @@ mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* he // ------------------------------------------------------ mi_decl_export bool mi_heap_contains_block(mi_heap_t* heap, const void* p); - mi_decl_export bool mi_heap_check_owned(mi_heap_t* heap, const void* p); mi_decl_export bool mi_check_owned(const void* p); @@ -323,42 +322,42 @@ mi_decl_export void mi_option_set_default(mi_option_t option, long value); // ------------------------------------------------------------------------------------------------------- // "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions. // (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.) +// note: we use `mi_cfree` as "checked free" and it checks if the pointer is in our heap before free-ing. // ------------------------------------------------------------------------------------------------------- -mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; -mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); -mi_decl_nodiscard mi_decl_export void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); - -mi_decl_nodiscard mi_decl_export void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_nodiscard mi_decl_export void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); -mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); +mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); +mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); mi_decl_nodiscard mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept; -mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept; mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept; -mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; +mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; -mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; +mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; // The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`. // (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception). -mi_decl_nodiscard mi_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_nodiscard mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); -mi_decl_nodiscard mi_decl_export void* mi_new_nothrow(size_t size) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_nodiscard mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); -mi_decl_nodiscard mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2); -mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2); +mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3); #ifdef __cplusplus } diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 40362068..85408868 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -64,53 +64,53 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t } -mi_decl_allocator void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false); } -mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_malloc_aligned_at(heap, size, alignment, 0); } -mi_decl_allocator void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true); } -mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_zalloc_aligned_at(heap, size, alignment, 0); } -mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_zalloc_aligned_at(heap, total, alignment, offset); } -mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_calloc_aligned_at(heap,count,size,alignment,0); } -mi_decl_allocator void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset); } -mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment); } -mi_decl_allocator void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset); } -mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment); } -mi_decl_allocator void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset); } -mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment); } @@ -153,55 +153,55 @@ static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsi return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero); } -mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false); } -mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false); } -mi_decl_allocator void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true); } -mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true); } -mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset); } -mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { +void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned(heap, p, total, alignment); } -mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); } -mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment); } -mi_decl_allocator void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); } -mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment); } -mi_decl_allocator void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset); } -mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { +void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment); } diff --git a/src/alloc-posix.c b/src/alloc-posix.c index ffc75373..c74b6082 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -55,24 +55,24 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept return 0; } -void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { +mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); mi_assert_internal(((uintptr_t)p % alignment) == 0); return p; } -void* mi_valloc(size_t size) mi_attr_noexcept { +mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept { return mi_malloc_aligned(size, _mi_os_page_size()); } -void* mi_pvalloc(size_t size) mi_attr_noexcept { +mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept { size_t psize = _mi_os_page_size(); if (size >= SIZE_MAX - psize) return NULL; // overflow size_t asize = ((size + psize - 1) / psize) * psize; return mi_malloc_aligned(asize, psize); } -void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { +mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { if (alignment==0 || !_mi_is_power_of_two(alignment)) return NULL; if ((size&(alignment-1)) != 0) return NULL; // C11 requires integral multiple, see void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); @@ -92,7 +92,7 @@ void* mi__expand(void* p, size_t newsize) mi_attr_noexcept { // Microsoft return res; } -unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept { +mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept { if (s==NULL) return NULL; size_t len; for(len = 0; s[len] != 0; len++) { } @@ -104,7 +104,7 @@ unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept { return p; } -unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept { +mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept { return (unsigned char*)mi_strdup((const char*)s); } diff --git a/src/alloc.c b/src/alloc.c index d2fbe4b1..b080e6fc 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -58,7 +58,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } // allocate a small block -extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { +extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { mi_assert(heap!=NULL); mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local mi_assert(size <= MI_SMALL_SIZE_MAX); @@ -74,12 +74,12 @@ extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size return p; } -extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept { +extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept { return mi_heap_malloc_small(mi_get_default_heap(), size); } // The main allocation function -extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { +extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { return mi_heap_malloc_small(heap, size); } @@ -98,7 +98,7 @@ extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t siz } } -extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { +extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept { return mi_heap_malloc(mi_get_default_heap(), size); } @@ -122,7 +122,7 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { } // zero initialized small block -mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept { +mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { void* p = mi_malloc_small(size); if (p != NULL) { _mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again? @@ -138,11 +138,11 @@ void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) { return p; } -extern inline mi_decl_allocator void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { +extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { return _mi_heap_malloc_zero(heap, size, true); } -mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept { +mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept { return mi_heap_zalloc(mi_get_default_heap(),size); } @@ -523,29 +523,29 @@ void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { mi_free(p); } -extern inline mi_decl_allocator void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { +extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count,size,&total)) return NULL; return mi_heap_zalloc(heap,total); } -mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { +mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { return mi_heap_calloc(mi_get_default_heap(),count,size); } // Uninitialized `calloc` -extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { +extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_malloc(heap, total); } -mi_decl_allocator void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { +mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { return mi_heap_mallocn(mi_get_default_heap(),count,size); } // Expand in place or fail -mi_decl_allocator void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { +void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { if (p == NULL) return NULL; size_t size = mi_usable_size(p); if (newsize > size) return NULL; @@ -571,11 +571,11 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) return newp; } -mi_decl_allocator void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { return _mi_heap_realloc_zero(heap, p, newsize, false); } -mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { +void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_realloc(heap, p, total); @@ -583,41 +583,41 @@ mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, // Reallocate but free `p` on errors -mi_decl_allocator void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { void* newp = mi_heap_realloc(heap, p, newsize); if (newp==NULL && p!=NULL) mi_free(p); return newp; } -mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { return _mi_heap_realloc_zero(heap, p, newsize, true); } -mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { +void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_rezalloc(heap, p, total); } -mi_decl_allocator void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { +void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_realloc(mi_get_default_heap(),p,newsize); } -mi_decl_allocator void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { +void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { return mi_heap_reallocn(mi_get_default_heap(),p,count,size); } // Reallocate but free `p` on errors -mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { +void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_reallocf(mi_get_default_heap(),p,newsize); } -mi_decl_allocator void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { +void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_rezalloc(mi_get_default_heap(), p, newsize); } -mi_decl_allocator void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { +void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { return mi_heap_recalloc(mi_get_default_heap(), p, count, size); } @@ -628,7 +628,7 @@ mi_decl_allocator void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_ // ------------------------------------------------------ // `strdup` using mi_malloc -char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { +mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { if (s == NULL) return NULL; size_t n = strlen(s); char* t = (char*)mi_heap_malloc(heap,n+1); @@ -636,12 +636,12 @@ char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { return t; } -char* mi_strdup(const char* s) mi_attr_noexcept { +mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept { return mi_heap_strdup(mi_get_default_heap(), s); } // `strndup` using mi_malloc -char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { +mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { if (s == NULL) return NULL; size_t m = strlen(s); if (n > m) n = m; @@ -652,7 +652,7 @@ char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept return t; } -char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { +mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { return mi_heap_strndup(mi_get_default_heap(),s,n); } @@ -663,7 +663,7 @@ char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { #define PATH_MAX MAX_PATH #endif #include -char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { +mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { // todo: use GetFullPathNameW to allow longer file names char buf[PATH_MAX]; DWORD res = GetFullPathNameA(fname, PATH_MAX, (resolved_name == NULL ? buf : resolved_name), NULL); @@ -709,7 +709,7 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) } #endif -char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { +mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name); } #endif @@ -774,19 +774,19 @@ static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow ) { return p; } -void* mi_new(size_t size) { +mi_decl_restrict void* mi_new(size_t size) { void* p = mi_malloc(size); if (mi_unlikely(p == NULL)) return mi_try_new(size,false); return p; } -void* mi_new_nothrow(size_t size) { +mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept { void* p = mi_malloc(size); if (mi_unlikely(p == NULL)) return mi_try_new(size, true); return p; } -void* mi_new_aligned(size_t size, size_t alignment) { +mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) { void* p; do { p = mi_malloc_aligned(size, alignment); @@ -795,7 +795,7 @@ void* mi_new_aligned(size_t size, size_t alignment) { return p; } -void* mi_new_aligned_nothrow(size_t size, size_t alignment) { +mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept { void* p; do { p = mi_malloc_aligned(size, alignment); @@ -804,7 +804,7 @@ void* mi_new_aligned_nothrow(size_t size, size_t alignment) { return p; } -void* mi_new_n(size_t count, size_t size) { +mi_decl_restrict void* mi_new_n(size_t count, size_t size) { size_t total; if (mi_unlikely(mi_count_size_overflow(count, size, &total))) { mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc diff --git a/test/main-override.cpp b/test/main-override.cpp index fcf3970f..490f1fb8 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -8,6 +8,25 @@ #include #include +#include +#include +#include + +// Issue #202 +void thread_main() { + mi_heap_t* heap = mi_heap_new(); + void* q = mi_heap_malloc(heap,1024); + // mi_heap_delete(heap); // uncomment to prevent assertion +} + +int main() { + auto t1 = std::thread(thread_main); + t1.join(); + return 0; +} + +/* + static void* p = malloc(8); void free_p() { @@ -32,13 +51,13 @@ int main() { free(p1); p1 = malloc(8); char* s = mi_strdup("hello\n"); - /* - char* s = _strdup("hello\n"); - char* buf = NULL; - size_t len; - _dupenv_s(&buf,&len,"MIMALLOC_VERBOSE"); - mi_free(buf); - */ + + //char* s = _strdup("hello\n"); + //char* buf = NULL; + //size_t len; + //_dupenv_s(&buf,&len,"MIMALLOC_VERBOSE"); + //mi_free(buf); + mi_free(p2); p2 = malloc(16); p1 = realloc(p1, 32); @@ -84,4 +103,5 @@ bool test_stl_allocator2() { vec.push_back(some_struct()); vec.pop_back(); return vec.size() == 0; -} \ No newline at end of file +} +*/ \ No newline at end of file From 946a71c4a957ac3a74c1270be44dcf8b32e254ae Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 13 Feb 2020 11:37:48 -0800 Subject: [PATCH 283/293] fix issue #204 (and #205) by doing thread delayed free after absorbing the pages --- src/heap.c | 30 +++++++++---------- test/main-override.cpp | 66 ++++++++++++++++++++++++++++++++---------- 2 files changed, 64 insertions(+), 32 deletions(-) diff --git a/src/heap.c b/src/heap.c index 93275747..900cef65 100644 --- a/src/heap.c +++ b/src/heap.c @@ -312,33 +312,29 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { mi_assert_internal(heap!=NULL); if (from==NULL || from->page_count == 0) return; - // unfull all full pages in the `from` heap - mi_page_t* page = from->pages[MI_BIN_FULL].first; - while (page != NULL) { - mi_page_t* next = page->next; - _mi_page_unfull(page); - page = next; - } - mi_assert_internal(from->pages[MI_BIN_FULL].first == NULL); - - // free outstanding thread delayed free blocks + // reduce the size of the delayed frees _mi_heap_delayed_free(from); - - // transfer all pages by appending the queues; this will set - // a new heap field which is ok as all pages are unfull'd and thus - // other threads won't access this field anymore (see `mi_free_block_mt`) - for (size_t i = 0; i < MI_BIN_FULL; i++) { + + // transfer all pages by appending the queues; this will set a new heap field + // so threads may do delayed frees in either heap for a while. + for (size_t i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_queue_t* append = &from->pages[i]; size_t pcount = _mi_page_queue_append(heap, pq, append); heap->page_count += pcount; from->page_count -= pcount; } - mi_assert_internal(from->thread_delayed_free == NULL); mi_assert_internal(from->page_count == 0); + // and do outstanding delayed frees in the `from` heap + // note: be careful here as the `heap` field in all those pages no longer point to `from`, + // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a + // the regular `_mi_free_delayed_block` which is safe. + _mi_heap_delayed_free(from); + mi_assert_internal(from->thread_delayed_free == NULL); + // and reset the `from` heap - mi_heap_reset_pages(from); + mi_heap_reset_pages(from); } // Safe delete a heap without freeing any still allocated blocks in that heap. diff --git a/test/main-override.cpp b/test/main-override.cpp index 490f1fb8..957b7872 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -12,21 +12,27 @@ #include #include -// Issue #202 -void thread_main() { - mi_heap_t* heap = mi_heap_new(); - void* q = mi_heap_malloc(heap,1024); - // mi_heap_delete(heap); // uncomment to prevent assertion -} +#ifdef _WIN32 +#include +static void msleep(unsigned long msecs) { Sleep(msecs); } +#else +#include +static void msleep(unsigned long msecs) { usleep(msecs * 1000UL); } +#endif + +void heap_no_delete(); +void heap_late_free(); +void various_tests(); int main() { - auto t1 = std::thread(thread_main); - t1.join(); + mi_stats_reset(); // ignore earlier allocations + // heap_no_delete(); // issue #202 + // heap_late_free(); // issue #204 + various_tests(); + mi_stats_print(NULL); return 0; } -/* - static void* p = malloc(8); void free_p() { @@ -43,8 +49,7 @@ public: }; -int main() { - mi_stats_reset(); // ignore earlier allocations +void various_tests() { atexit(free_p); void* p1 = malloc(78); void* p2 = mi_malloc_aligned(16,24); @@ -68,8 +73,6 @@ int main() { delete t; t = new (std::nothrow) Test(42); delete t; - mi_stats_print(NULL); - return 0; } class Static { @@ -104,4 +107,37 @@ bool test_stl_allocator2() { vec.pop_back(); return vec.size() == 0; } -*/ \ No newline at end of file + + + +// Issue #202 +void heap_no_delete_worker() { + mi_heap_t* heap = mi_heap_new(); + void* q = mi_heap_malloc(heap,1024); + // mi_heap_delete(heap); // uncomment to prevent assertion +} + +void heap_no_delete() { + auto t1 = std::thread(heap_no_delete_worker); + t1.join(); +} + + +// Issue #204 +volatile void* global_p; + +void t1main() { + mi_heap_t* heap = mi_heap_new(); + global_p = mi_heap_malloc(heap, 1024); + mi_heap_delete(heap); +} + +void heap_late_free() { + auto t1 = std::thread(t1main); + + msleep(2000); + assert(global_p); + mi_free((void*)global_p); + + t1.join(); +} \ No newline at end of file From 8a2a52843d36a361c3e9a42f37240cce5baab517 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 13 Feb 2020 12:15:23 -0800 Subject: [PATCH 284/293] delete all thread owned heaps when a thread is terminated (issue #202) --- include/mimalloc-types.h | 2 ++ src/heap.c | 21 ++++++++++++++++++++- src/init.c | 23 +++++++++++++++++++---- test/main-override.cpp | 2 +- 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 71f3ae80..dc85bbcd 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -329,6 +329,7 @@ struct mi_heap_s { uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation size_t page_count; // total number of pages in the `pages` queues. + mi_heap_t* next; // list of heaps per thread bool no_reclaim; // `true` if this heap should not reclaim abandoned pages }; @@ -469,6 +470,7 @@ struct mi_tld_s { unsigned long long heartbeat; // monotonic heartbeat count bool recurse; // true if deferred was called; used to prevent infinite recursion. mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) + mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) mi_segments_tld_t segments; // segment tld mi_os_tld_t os; // os tld mi_stats_t stats; // statistics diff --git a/src/heap.c b/src/heap.c index 900cef65..0bf26988 100644 --- a/src/heap.c +++ b/src/heap.c @@ -191,7 +191,7 @@ mi_heap_t* mi_heap_get_backing(void) { mi_heap_t* mi_heap_new(void) { mi_heap_t* bheap = mi_heap_get_backing(); - mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); + mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? if (heap==NULL) return NULL; memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = bheap->tld; @@ -201,6 +201,9 @@ mi_heap_t* mi_heap_new(void) { heap->keys[0] = _mi_heap_random_next(heap); heap->keys[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe + // push on the thread local heaps list + heap->next = heap->tld->heaps; + heap->tld->heaps = heap; return heap; } @@ -230,6 +233,22 @@ static void mi_heap_free(mi_heap_t* heap) { if (mi_heap_is_default(heap)) { _mi_heap_set_default_direct(heap->tld->heap_backing); } + + // remove ourselves from the thread local heaps list + // linear search but we expect the number of heaps to be relatively small + mi_heap_t* prev = NULL; + mi_heap_t* curr = heap->tld->heaps; + while (curr != heap && curr != NULL) { + prev = curr; + curr = curr->next; + } + mi_assert_internal(curr == heap); + if (curr == heap) { + if (prev != NULL) { prev->next = heap->next; } + else { heap->tld->heaps = heap->next; } + } + mi_assert_internal(heap->tld->heaps != NULL); + // and free the used memory mi_free(heap); } diff --git a/src/init.c b/src/init.c index 2f5ca224..2c9dec1a 100644 --- a/src/init.c +++ b/src/init.c @@ -97,6 +97,7 @@ const mi_heap_t _mi_heap_empty = { { 0, 0 }, // keys { {0}, {0}, 0 }, 0, // page count + NULL, // next false }; @@ -111,7 +112,7 @@ extern mi_heap_t _mi_heap_main; static mi_tld_t tld_main = { 0, false, - &_mi_heap_main, + &_mi_heap_main, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os @@ -130,6 +131,7 @@ mi_heap_t _mi_heap_main = { { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) { {0x846ca68b}, {0}, 0 }, // random 0, // page count + NULL, // next heap false // can reclaim }; @@ -192,6 +194,7 @@ static bool _mi_heap_init(void) { heap->keys[1] = _mi_heap_random_next(heap); heap->tld = tld; tld->heap_backing = heap; + tld->heaps = heap; tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; tld->os.stats = &tld->stats; @@ -207,12 +210,24 @@ static bool _mi_heap_done(mi_heap_t* heap) { // reset default heap _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); - // todo: delete all non-backing heaps? - - // switch to backing heap and free it + // switch to backing heap heap = heap->tld->heap_backing; if (!mi_heap_is_initialized(heap)) return false; + + // delete all non-backing heaps in this thread + mi_heap_t* curr = heap->tld->heaps; + while (curr != NULL) { + mi_heap_t* next = curr->next; // save `next` as `curr` will be freed + if (curr != heap) { + mi_assert_internal(!mi_heap_is_backing(curr)); + mi_heap_delete(curr); + } + curr = next; + } + mi_assert_internal(heap->tld->heaps == heap && heap->next == NULL); + mi_assert_internal(mi_heap_is_backing(heap)); + // collect if not the main thread if (heap != &_mi_heap_main) { _mi_heap_collect_abandon(heap); diff --git a/test/main-override.cpp b/test/main-override.cpp index 957b7872..b4ce4c1c 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -28,7 +28,7 @@ int main() { mi_stats_reset(); // ignore earlier allocations // heap_no_delete(); // issue #202 // heap_late_free(); // issue #204 - various_tests(); + // various_tests(); mi_stats_print(NULL); return 0; } From af37302e8327b610513a2762769bd23c006d8565 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 13 Feb 2020 12:27:06 -0800 Subject: [PATCH 285/293] add assertions --- src/heap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/heap.c b/src/heap.c index 0bf26988..b1771264 100644 --- a/src/heap.c +++ b/src/heap.c @@ -226,6 +226,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) { // called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources. static void mi_heap_free(mi_heap_t* heap) { + mi_assert(heap != NULL); mi_assert_internal(mi_heap_is_initialized(heap)); if (mi_heap_is_backing(heap)) return; // dont free the backing heap @@ -305,6 +306,7 @@ void _mi_heap_destroy_pages(mi_heap_t* heap) { } void mi_heap_destroy(mi_heap_t* heap) { + mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); mi_assert(heap->no_reclaim); mi_assert_expensive(mi_heap_is_valid(heap)); @@ -359,6 +361,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { // Safe delete a heap without freeing any still allocated blocks in that heap. void mi_heap_delete(mi_heap_t* heap) { + mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); mi_assert_expensive(mi_heap_is_valid(heap)); if (!mi_heap_is_initialized(heap)) return; From e981e9227eb0237da1ff3e2909b96c671c5c115a Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 13 Feb 2020 13:12:19 -0800 Subject: [PATCH 286/293] ensure thread delayed freeing is correct during heap_absorb; #204 --- src/heap.c | 4 +++- src/page-queue.c | 5 +++++ test/main-override.cpp | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/heap.c b/src/heap.c index b1771264..5d0d4b8a 100644 --- a/src/heap.c +++ b/src/heap.c @@ -338,6 +338,8 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { // transfer all pages by appending the queues; this will set a new heap field // so threads may do delayed frees in either heap for a while. + // note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state + // so after this only the new heap will get delayed frees for (size_t i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_queue_t* append = &from->pages[i]; @@ -351,7 +353,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { // note: be careful here as the `heap` field in all those pages no longer point to `from`, // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a // the regular `_mi_free_delayed_block` which is safe. - _mi_heap_delayed_free(from); + _mi_heap_delayed_free(from); mi_assert_internal(from->thread_delayed_free == NULL); // and reset the `from` heap diff --git a/src/page-queue.c b/src/page-queue.c index 68e2aaa4..b2687c92 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -329,6 +329,7 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_page_set_in_full(page, mi_page_queue_is_full(to)); } +// Only called from `mi_heap_absorb`. size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) { mi_assert_internal(mi_heap_contains_queue(heap,pq)); mi_assert_internal(pq->block_size == append->block_size); @@ -339,6 +340,10 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue size_t count = 0; for (mi_page_t* page = append->first; page != NULL; page = page->next) { mi_page_set_heap(page,heap); + // set it to delayed free (not overriding NEVER_DELAYED_FREE) which has as a + // side effect that it spins until any DELAYED_FREEING is finished. This ensures + // that after appending only the new heap will be used for delayed free operations. + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false); count++; } diff --git a/test/main-override.cpp b/test/main-override.cpp index b4ce4c1c..957b7872 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -28,7 +28,7 @@ int main() { mi_stats_reset(); // ignore earlier allocations // heap_no_delete(); // issue #202 // heap_late_free(); // issue #204 - // various_tests(); + various_tests(); mi_stats_print(NULL); return 0; } From 67de2549cf8585250e17501e714c83a21746b20b Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 14 Feb 2020 09:40:56 -0800 Subject: [PATCH 287/293] fix build with clang-cl due to _Check_return_ (issue #200) --- include/mimalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index d1120e9f..f057c78d 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -26,9 +26,9 @@ terms of the MIT license. A copy of the license can be found in the file #if (__cplusplus >= 201703) #define mi_decl_nodiscard [[nodiscard]] -#elif (__GNUC__ >= 4) +#elif (__GNUC__ >= 4) || defined(__clang__) // includes clang, icc, and clang-cl #define mi_decl_nodiscard __attribute__((warn_unused_result)) -#elif (_MSC_VER >= 1700) +#elif (_MSC_VER >= 1700) #define mi_decl_nodiscard _Check_return_ #else #define mi_decl_nodiscard From 3e198cc87d7578f26b9dfe76731fc81a27687440 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 14 Feb 2020 11:11:57 -0800 Subject: [PATCH 288/293] fix too strict assertion (issue #204) --- src/page-queue.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/page-queue.c b/src/page-queue.c index b2687c92..ea213019 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -339,8 +339,10 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue // set append pages to new heap and count size_t count = 0; for (mi_page_t* page = append->first; page != NULL; page = page->next) { - mi_page_set_heap(page,heap); - // set it to delayed free (not overriding NEVER_DELAYED_FREE) which has as a + // inline `mi_page_set_heap` to avoid wrong assertion during absorption; + // in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive. + mi_atomic_write(&page->xheap, (uintptr_t)heap); + // set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a // side effect that it spins until any DELAYED_FREEING is finished. This ensures // that after appending only the new heap will be used for delayed free operations. _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false); From 16ebb70e4c8a342e6453148397217e604fe45ee4 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 17 Feb 2020 09:15:48 -0800 Subject: [PATCH 289/293] strengthen aligment guarantee (issue #206) (reverse commit 4531367) --- src/alloc-aligned.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 85408868..8be2e598 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -20,8 +20,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t mi_assert(alignment > 0 && alignment % sizeof(void*) == 0); if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see ) - if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see ) - if (alignment <= MI_MAX_ALIGN_SIZE && offset==0) return _mi_heap_malloc_zero(heap, size, zero); + if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see ) const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` // try if there is a small block available with just the right alignment From f2ac272baaba126745a70b92bf0b8887fd3aedd6 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 17 Feb 2020 09:59:11 -0800 Subject: [PATCH 290/293] strengthen alignment check for memalign and aligned_alloc --- include/mimalloc-internal-tld.h | 722 ++++++++++++++++++++++++++++++++ include/mimalloc-internal.h | 4 + src/alloc-aligned.c | 2 +- src/alloc-posix.c | 10 +- 4 files changed, 732 insertions(+), 6 deletions(-) create mode 100644 include/mimalloc-internal-tld.h diff --git a/include/mimalloc-internal-tld.h b/include/mimalloc-internal-tld.h new file mode 100644 index 00000000..ce67b0c7 --- /dev/null +++ b/include/mimalloc-internal-tld.h @@ -0,0 +1,722 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_INTERNAL_TLD_H +#define MIMALLOC_INTERNAL_TLD_H + +#include "mimalloc-types.h" +#include "mimalloc-internal.h" + +#define MI_TLD_DECL 1 // thread local declaration +#define MI_TLD_PTHREAD 2 // ptrhead_get/setspecific +#define MI_TLD_DECL_GUARD 3 // thread local + recursion guard at initial load +#define MI_TLD_PTHREAD_GUARD 4 // ptrhead_get/setspecific + recursion guard at initial load +#define MI_TLD_SLOT 5 // steal slot from OS thread local predefined slots +#define MI_TLD_PTHREAD_SLOT 6 // steal slot from pthread structure (usually `retval`) + + +#if !defined(MI_TLD) +#if defined(_MSC_VER) || defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) + // on windows and linux/freeBSD/netBSD (with initial-exec) a __thread always works without recursion into malloc + #define MI_TLD MI_TLD_DECL +#elif !defined(MI_MIMALLOC_OVERRIDE) + // if not overriding, __thread declarations should be fine (use MI_TLD_PTHREAD if your OS does not have __thread) + #define MI_TLD MI_TLD_DECL +#elif // defined(MI_MALLOC_OVERRIDE) + // if overriding, some BSD variants allocate when accessing a thread local the first time + #if defined(__APPLE__) + #define MI_TLD MI_TLD_SLOT + #define MI_TLD_SLOT_NUM 89 // seems unused? (__PTK_FRAMEWORK_OLDGC_KEY9) see + // possibly unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) + // #define MI_TLD MI_TLD_PTHREAD_SLOT + // #define MI_TLD_PTHREAD_SLOT_OFS (2*sizeof(void*) + sizeof(long) + 2*sizeof(void*) /*TAILQ*/) // offset `tl_exit_value` + #elif defined(__OpenBSD__) + #define MI_TLD MI_TLD_PTHREAD_SLOT + #define MI_TLD_PTHREAD_SLOT_OFS (6*sizeof(int) + 1*sizeof(void*)) // offset `retval` + #elif defined(__DragonFly__) + #define MI_TLD MI_TLD_PTHREAD_SLOT + #define MI_TLD_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) + #endif + #endif +#endif + +#if (MI_DEBUG>0) +#define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) +#else +#define mi_trace_message(...) +#endif + +#define MI_CACHE_LINE 64 +#if defined(_MSC_VER) +#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) +#define mi_decl_noinline __declspec(noinline) +#define mi_decl_thread __declspec(thread) +#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc +#define mi_decl_noinline __attribute__((noinline)) +#define mi_decl_thread __thread +#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#else +#define mi_decl_noinline +#define mi_decl_thread __thread // hope for the best :-) +#define mi_decl_cache_align +#endif + + +// "options.c" +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); +void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); +void _mi_warning_message(const char* fmt, ...); +void _mi_verbose_message(const char* fmt, ...); +void _mi_trace_message(const char* fmt, ...); +void _mi_options_init(void); +void _mi_error_message(int err, const char* fmt, ...); + +// random.c +void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); +uintptr_t _mi_random_next(mi_random_ctx_t* ctx); +uintptr_t _mi_heap_random_next(mi_heap_t* heap); +uintptr_t _os_random_weak(uintptr_t extra_seed); +static inline uintptr_t _mi_random_shuffle(uintptr_t x); + +// init.c +extern mi_stats_t _mi_stats_main; +extern const mi_page_t _mi_page_empty; +bool _mi_is_main_thread(void); +bool _mi_preloading(); // true while the C runtime is not ready + +// os.c +size_t _mi_os_page_size(void); +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data +void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data +size_t _mi_os_good_alloc_size(size_t size); + +// memory.c +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); +void _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld); + +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); +bool _mi_mem_protect(void* addr, size_t size); +bool _mi_mem_unprotect(void* addr, size_t size); + +void _mi_mem_collect(mi_os_tld_t* tld); + +// "segment.c" +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); +void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page +void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); + +void _mi_segment_thread_collect(mi_segments_tld_t* tld); +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); +void _mi_abandoned_await_readers(void); + + + +// "page.c" +void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; + +void _mi_page_retire(mi_page_t* page); // free the page if there are no other pages with many free blocks +void _mi_page_unfull(mi_page_t* page); +void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page +void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... +void _mi_heap_delayed_free(mi_heap_t* heap); +void _mi_heap_collect_retired(mi_heap_t* heap, bool force); + +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); +size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); +void _mi_deferred_free(mi_heap_t* heap, bool force); + +void _mi_page_free_collect(mi_page_t* page,bool force); +void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments + +size_t _mi_bin_size(uint8_t bin); // for stats +uint8_t _mi_bin(size_t size); // for stats +uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD in "os.c" + +// "heap.c" +void _mi_heap_destroy_pages(mi_heap_t* heap); +void _mi_heap_collect_abandon(mi_heap_t* heap); +void _mi_heap_set_default_direct(mi_heap_t* heap); + +// "stats.c" +void _mi_stats_done(mi_stats_t* stats); + +mi_msecs_t _mi_clock_now(void); +mi_msecs_t _mi_clock_end(mi_msecs_t start); +mi_msecs_t _mi_clock_start(void); + +// "alloc.c" +void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero); +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero); +mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); +bool _mi_free_delayed_block(mi_block_t* block); +void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size); + +#if MI_DEBUG>1 +bool _mi_page_is_valid(mi_page_t* page); +#endif + + +// ------------------------------------------------------ +// Branches +// ------------------------------------------------------ + +#if defined(__GNUC__) || defined(__clang__) +#define mi_unlikely(x) __builtin_expect((x),0) +#define mi_likely(x) __builtin_expect((x),1) +#else +#define mi_unlikely(x) (x) +#define mi_likely(x) (x) +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + + +/* ----------------------------------------------------------- + Error codes passed to `_mi_fatal_error` + All are recoverable but EFAULT is a serious error and aborts by default in secure mode. + For portability define undefined error codes using common Unix codes: + +----------------------------------------------------------- */ +#include +#ifndef EAGAIN // double free +#define EAGAIN (11) +#endif +#ifndef ENOMEM // out of memory +#define ENOMEM (12) +#endif +#ifndef EFAULT // corrupted free-list or meta-data +#define EFAULT (14) +#endif +#ifndef EINVAL // trying to free an invalid pointer +#define EINVAL (22) +#endif +#ifndef EOVERFLOW // count*size overflow +#define EOVERFLOW (75) +#endif + + +/* ----------------------------------------------------------- + Inlined definitions +----------------------------------------------------------- */ +#define UNUSED(x) (void)(x) +#if (MI_DEBUG>0) +#define UNUSED_RELEASE(x) +#else +#define UNUSED_RELEASE(x) UNUSED(x) +#endif + +#define MI_INIT4(x) x(),x(),x(),x() +#define MI_INIT8(x) MI_INIT4(x),MI_INIT4(x) +#define MI_INIT16(x) MI_INIT8(x),MI_INIT8(x) +#define MI_INIT32(x) MI_INIT16(x),MI_INIT16(x) +#define MI_INIT64(x) MI_INIT32(x),MI_INIT32(x) +#define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x) +#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) + + +// Is `x` a power of two? (0 is considered a power of two) +static inline bool _mi_is_power_of_two(uintptr_t x) { + return ((x & (x - 1)) == 0); +} + +// Align upwards +static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return ((sz + mask) & ~mask); + } + else { + return (((sz + mask)/alignment)*alignment); + } +} + +// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. +static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { + mi_assert_internal(divider != 0); + return (divider == 0 ? size : ((size + divider - 1) / divider)); +} + +// Is memory zero initialized? +static inline bool mi_mem_is_zero(void* p, size_t size) { + for (size_t i = 0; i < size; i++) { + if (((uint8_t*)p)[i] != 0) return false; + } + return true; +} + +// Align a byte size to a size in _machine words_, +// i.e. byte size == `wsize*sizeof(void*)`. +static inline size_t _mi_wsize_from_size(size_t size) { + mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t)); + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + + +// Overflow detecting multiply +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { +#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 +#include // UINT_MAX, ULONG_MAX +#if (SIZE_MAX == UINT_MAX) + return __builtin_umul_overflow(count, size, total); +#elif (SIZE_MAX == ULONG_MAX) + return __builtin_umull_overflow(count, size, total); +#else + return __builtin_umulll_overflow(count, size, total); +#endif +#else /* __builtin_umul_overflow is unavailable */ + #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) + *total = count * size; + return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) + && size > 0 && (SIZE_MAX / size) < count); +#endif +} + +// Safe multiply `count*size` into `total`; return `true` on overflow. +static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) { + if (count==1) { // quick check for the case where count is one (common for C++ allocators) + *total = size; + return false; + } + else if (mi_unlikely(mi_mul_overflow(count, size, total))) { + _mi_error_message(EOVERFLOW, "allocation request too large (%zu * %zu bytes)\n", count, size); + *total = SIZE_MAX; + return true; + } + else return false; +} + + +/* ----------------------------------------------------------- + The thread local default heap +----------------------------------------------------------- */ + +extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap +extern mi_heap_t _mi_heap_main; // statically allocated main backing heap +extern bool _mi_process_is_initialized; + +#if defined(MI_TLS_OSX_FAST) +#define MI_TLS_OSX_OFFSET (MI_TLS_OSX_SLOT*sizeof(void*)) +static inline void* mi_tls_osx_fast_get(void) { + void* ret; + __asm__("mov %%gs:%1, %0" : "=r" (ret) : "m" (*(void**)(MI_TLS_OSX_OFFSET))); + return ret; +} +static inline void mi_tls_osx_fast_set(void* value) { + __asm__("movq %1,%%gs:%0" : "=m" (*(void**)(MI_TLS_OSX_OFFSET)) : "rn" (value)); +} +#elif defined(MI_TLS_PTHREADS) +extern pthread_key_t _mi_heap_default_key; +#else +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +#endif + + +static inline mi_heap_t* mi_get_default_heap(void) { +#if defined(MI_TLS_OSX_FAST) + // Use a fixed slot in the TSD on MacOSX to avoid recursion (since the loader calls malloc). + // We use slot 94 (__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4) + // which seems unused except for the more recent Webkit + // Use with care. + mi_heap_t* heap = (mi_heap_t*)mi_tls_osx_fast_get(); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +#elif defined(MI_TLS_PTHREADS) + // Use pthreads for TLS; this is used on macOSX with interpose as the loader calls `malloc` + // to allocate TLS storage leading to recursive calls if __thread declared variables are accessed. + // Using pthreads allows us to initialize without recursive calls. (performance seems still quite good). + mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? (mi_heap_t*)&_mi_heap_empty : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +#else + #if defined(MI_TLS_RECURSE_GUARD) + // On some BSD platforms, like openBSD, the dynamic loader calls `malloc` + // to initialize thread local data (before our module is loaded). + // To avoid recursion, we need to avoid accessing the thread local `_mi_default_heap` + // until our module is loaded and use the statically allocated main heap until that time. + // TODO: patch ourselves dynamically to avoid this check every time? + // if (mi_unlikely(!_mi_process_is_initialized)) return &_mi_heap_main; + #endif + return _mi_heap_default; +#endif +} + +static inline bool mi_heap_is_default(const mi_heap_t* heap) { + return (heap == mi_get_default_heap()); +} + +static inline bool mi_heap_is_backing(const mi_heap_t* heap) { + return (heap->tld->heap_backing == heap); +} + +static inline bool mi_heap_is_initialized(mi_heap_t* heap) { + mi_assert_internal(heap != NULL); + return (heap != &_mi_heap_empty); +} + +static inline uintptr_t _mi_ptr_cookie(const void* p) { + mi_assert_internal(_mi_heap_main.cookie != 0); + return ((uintptr_t)p ^ _mi_heap_main.cookie); +} + +/* ----------------------------------------------------------- + Pages +----------------------------------------------------------- */ + +static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { + mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); + const size_t idx = _mi_wsize_from_size(size); + mi_assert_internal(idx < MI_PAGES_DIRECT); + return heap->pages_free_direct[idx]; +} + +// Get the page belonging to a certain size class +static inline mi_page_t* _mi_get_free_small_page(size_t size) { + return _mi_heap_get_free_small_page(mi_get_default_heap(), size); +} + +// Segment that contains the pointer +static inline mi_segment_t* _mi_ptr_segment(const void* p) { + // mi_assert_internal(p != NULL); + return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK); +} + +// Segment belonging to a page +static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { + mi_segment_t* segment = _mi_ptr_segment(page); + mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]); + return segment; +} + +// used internally +static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { + // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages + ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; + mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE); + uintptr_t idx = (uintptr_t)diff >> segment->page_shift; + mi_assert_internal(idx < segment->capacity); + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); + return idx; +} + +// Get the page containing the pointer +static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { + uintptr_t idx = _mi_segment_page_idx_of(segment, p); + return &((mi_segment_t*)segment)->pages[idx]; +} + +// Quick page start for initialized pages +static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { + const size_t bsize = page->xblock_size; + mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0); + return _mi_segment_page_start(segment, page, bsize, page_size, NULL); +} + +// Get the page containing the pointer +static inline mi_page_t* _mi_ptr_page(void* p) { + return _mi_segment_page_of(_mi_ptr_segment(p), p); +} + +// Get the block size of a page (special cased for huge objects) +static inline size_t mi_page_block_size(const mi_page_t* page) { + const size_t bsize = page->xblock_size; + mi_assert_internal(bsize > 0); + if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) { + return bsize; + } + else { + size_t psize; + _mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL); + return psize; + } +} + +// Get the usable block size of a page without fixed padding. +// This may still include internal padding due to alignment and rounding up size classes. +static inline size_t mi_page_usable_block_size(const mi_page_t* page) { + return mi_page_block_size(page) - MI_PADDING_SIZE; +} + + +// Thread free access +static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { + return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3); +} + +static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { + return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3); +} + +// Heap access +static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { + return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap)); +} + +static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { + mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); + mi_atomic_write(&page->xheap,(uintptr_t)heap); +} + +// Thread free flag helpers +static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { + return (mi_block_t*)(tf & ~0x03); +} +static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) { + return (mi_delayed_t)(tf & 0x03); +} +static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) { + return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed); +} +static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) { + return mi_tf_make(mi_tf_block(tf),delayed); +} +static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) { + return mi_tf_make(block, mi_tf_delayed(tf)); +} + +// are all blocks in a page freed? +// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`. +static inline bool mi_page_all_free(const mi_page_t* page) { + mi_assert_internal(page != NULL); + return (page->used == 0); +} + +// are there any available blocks? +static inline bool mi_page_has_any_available(const mi_page_t* page) { + mi_assert_internal(page != NULL && page->reserved > 0); + return (page->used < page->reserved || (mi_page_thread_free(page) != NULL)); +} + +// are there immediately available blocks, i.e. blocks available on the free list. +static inline bool mi_page_immediate_available(const mi_page_t* page) { + mi_assert_internal(page != NULL); + return (page->free != NULL); +} + +// is more than 7/8th of a page in use? +static inline bool mi_page_mostly_used(const mi_page_t* page) { + if (page==NULL) return true; + uint16_t frac = page->reserved / 8U; + return (page->reserved - page->used <= frac); +} + +static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { + return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; +} + + + +//----------------------------------------------------------- +// Page flags +//----------------------------------------------------------- +static inline bool mi_page_is_in_full(const mi_page_t* page) { + return page->flags.x.in_full; +} + +static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { + page->flags.x.in_full = in_full; +} + +static inline bool mi_page_has_aligned(const mi_page_t* page) { + return page->flags.x.has_aligned; +} + +static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { + page->flags.x.has_aligned = has_aligned; +} + + +/* ------------------------------------------------------------------- +Encoding/Decoding the free list next pointers + +This is to protect against buffer overflow exploits where the +free list is mutated. Many hardened allocators xor the next pointer `p` +with a secret key `k1`, as `p^k1`. This prevents overwriting with known +values but might be still too weak: if the attacker can guess +the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). +Moreover, if multiple blocks can be read as well, the attacker can +xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot +about the pointers (and subsequently `k1`). + +Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<> (MI_INTPTR_BITS - shift))); +} +static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { + shift %= MI_INTPTR_BITS; + return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); +} + +static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { + void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); + return (mi_unlikely(p==null) ? NULL : p); +} + +static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) { + uintptr_t x = (uintptr_t)(mi_unlikely(p==NULL) ? null : p); + return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; +} + +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { + #ifdef MI_ENCODE_FREELIST + return (mi_block_t*)mi_ptr_decode(null, block->next, keys); + #else + UNUSED(keys); UNUSED(null); + return (mi_block_t*)block->next; + #endif +} + +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { + #ifdef MI_ENCODE_FREELIST + block->next = mi_ptr_encode(null, next, keys); + #else + UNUSED(keys); UNUSED(null); + block->next = (mi_encoded_t)next; + #endif +} + +static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { + #ifdef MI_ENCODE_FREELIST + mi_block_t* next = mi_block_nextx(page,block,page->keys); + // check for free list corruption: is `next` at least in the same page? + // TODO: check if `next` is `page->block_size` aligned? + if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { + _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); + next = NULL; + } + return next; + #else + UNUSED(page); + return mi_block_nextx(page,block,NULL); + #endif +} + +static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { + #ifdef MI_ENCODE_FREELIST + mi_block_set_nextx(page,block,next, page->keys); + #else + UNUSED(page); + mi_block_set_nextx(page,block,next,NULL); + #endif +} + +// ------------------------------------------------------------------- +// Fast "random" shuffle +// ------------------------------------------------------------------- + +static inline uintptr_t _mi_random_shuffle(uintptr_t x) { + if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros +#if (MI_INTPTR_SIZE==8) + // by Sebastiano Vigna, see: + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9UL; + x ^= x >> 27; + x *= 0x94d049bb133111ebUL; + x ^= x >> 31; +#elif (MI_INTPTR_SIZE==4) + // by Chris Wellons, see: + x ^= x >> 16; + x *= 0x7feb352dUL; + x ^= x >> 15; + x *= 0x846ca68bUL; + x ^= x >> 16; +#endif + return x; +} + +// ------------------------------------------------------------------- +// Optimize numa node access for the common case (= one node) +// ------------------------------------------------------------------- + +int _mi_os_numa_node_get(mi_os_tld_t* tld); +size_t _mi_os_numa_node_count_get(void); + +extern size_t _mi_numa_node_count; +static inline int _mi_os_numa_node(mi_os_tld_t* tld) { + if (mi_likely(_mi_numa_node_count == 1)) return 0; + else return _mi_os_numa_node_get(tld); +} +static inline size_t _mi_os_numa_node_count(void) { + if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count; + else return _mi_os_numa_node_count_get(); +} + + +// ------------------------------------------------------------------- +// Getting the thread id should be performant +// as it is called in the fast path of `_mi_free`, +// so we specialize for various platforms. +// ------------------------------------------------------------------- +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#include +static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { + // Windows: works on Intel and ARM in both 32- and 64-bit + return (uintptr_t)NtCurrentTeb(); +} +#elif (defined(__GNUC__) || defined(__clang__)) && \ + (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)) +// TLS register on x86 is in the FS or GS register +// see: https://akkadia.org/drepper/tls.pdf +static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { + uintptr_t tid; + #if defined(__i386__) + __asm__("movl %%gs:0, %0" : "=r" (tid) : : ); // 32-bit always uses GS + #elif defined(__MACH__) + __asm__("movq %%gs:0, %0" : "=r" (tid) : : ); // x86_64 macOS uses GS + #elif defined(__x86_64__) + __asm__("movq %%fs:0, %0" : "=r" (tid) : : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + asm volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid)); + #elif defined(__aarch64__) + asm volatile ("mrs %0, tpidr_el0" : "=r" (tid)); + #endif + return tid; +} +#else +// otherwise use standard C +static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { + return (uintptr_t)&_mi_heap_default; +} +#endif + + +#endif diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cea6b9c3..d0c0b3f3 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -232,6 +232,10 @@ static inline size_t _mi_wsize_from_size(size_t size) { return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); } +// Does malloc satisfy the alignment constraints already? +static inline bool mi_malloc_satisfies_alignment(size_t alignment, size_t size) { + return (alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2))); +} // Overflow detecting multiply static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 8be2e598..7eeb9e92 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -20,7 +20,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t mi_assert(alignment > 0 && alignment % sizeof(void*) == 0); if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see ) - if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see ) + if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see ) const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` // try if there is a small block available with just the right alignment diff --git a/src/alloc-posix.c b/src/alloc-posix.c index c74b6082..4395893b 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -48,7 +48,7 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept if (p == NULL) return EINVAL; if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment if (!_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 - void* q = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); + void* q = (mi_malloc_satisfies_alignment(alignment, size) ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); if (q==NULL && size != 0) return ENOMEM; mi_assert_internal(((uintptr_t)q % alignment) == 0); *p = q; @@ -56,26 +56,26 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept } mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { - void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); + void* p = (mi_malloc_satisfies_alignment(alignment,size) ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); mi_assert_internal(((uintptr_t)p % alignment) == 0); return p; } mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept { - return mi_malloc_aligned(size, _mi_os_page_size()); + return mi_memalign( _mi_os_page_size(), size ); } mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept { size_t psize = _mi_os_page_size(); if (size >= SIZE_MAX - psize) return NULL; // overflow - size_t asize = ((size + psize - 1) / psize) * psize; + size_t asize = _mi_align_up(size, psize); return mi_malloc_aligned(asize, psize); } mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { if (alignment==0 || !_mi_is_power_of_two(alignment)) return NULL; if ((size&(alignment-1)) != 0) return NULL; // C11 requires integral multiple, see - void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); + void* p = (mi_malloc_satisfies_alignment(alignment, size) ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); mi_assert_internal(((uintptr_t)p % alignment) == 0); return p; } From baf08e8d546b52e3c6773582230aa3f0a6f539f2 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 17 Feb 2020 09:59:34 -0800 Subject: [PATCH 291/293] fix size check on overflow when padding is enabled in debug mode --- src/alloc.c | 4 ++-- src/page.c | 11 +++++++---- test/test-api.c | 42 ++++++++++++++++++++++++++++-------------- 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index b080e6fc..b1c4cd34 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -25,7 +25,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* block = page->free; if (mi_unlikely(block == NULL)) { - return _mi_malloc_generic(heap, size); // slow path + return _mi_malloc_generic(heap, size); } mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); // pop from the free list @@ -86,7 +86,7 @@ extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size else { mi_assert(heap!=NULL); mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local - void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); + void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); // note: size can overflow but it is detected in malloc_generic mi_assert_internal(p == NULL || mi_usable_size(p) >= size); #if MI_STAT>1 if (p != NULL) { diff --git a/src/page.c b/src/page.c index 6aaef428..ef8a69e5 100644 --- a/src/page.c +++ b/src/page.c @@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file /* ----------------------------------------------------------- The core of the allocator. Every segment contains - pages of a certain block size. The main function + pages of a {certain block size. The main function exported is `mi_malloc_generic`. ----------------------------------------------------------- */ @@ -774,6 +774,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed. +// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept { mi_assert_internal(heap != NULL); @@ -793,9 +794,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // huge allocation? mi_page_t* page; - if (mi_unlikely(size > MI_LARGE_OBJ_SIZE_MAX)) { - if (mi_unlikely(size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see ) - _mi_error_message(EOVERFLOW, "allocation request is too large (%zu b requested)\n", size); + const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` + if (mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) )) { + if (mi_unlikely(req_size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see ) + _mi_error_message(EOVERFLOW, "allocation request is too large (%zu b requested)\n", req_size); return NULL; } else { @@ -804,6 +806,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept } else { // otherwise find a page with free blocks in our size segregated queues + mi_assert_internal(size >= MI_PADDING_SIZE); page = mi_find_free_page(heap,size); } if (mi_unlikely(page == NULL)) { // out of memory diff --git a/test/test-api.c b/test/test-api.c index 2d26e14d..166cfca6 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -31,7 +31,7 @@ we therefore test the API over various inputs. Please add more tests :-) #endif #include "mimalloc.h" -#include "mimalloc-internal.h" +// #include "mimalloc-internal.h" // --------------------------------------------------------------------------- // Test macros: CHECK(name,predicate) and CHECK_BODY(name,body) @@ -98,38 +98,34 @@ int main() { // --------------------------------------------------- // Extended - // --------------------------------------------------- - #if defined(MI_MALLOC_OVERRIDE) && !defined(_WIN32) + // --------------------------------------------------- CHECK_BODY("posix_memalign1", { void* p = &p; - int err = posix_memalign(&p, sizeof(void*), 32); - mi_assert((err==0 && (uintptr_t)p % sizeof(void*) == 0) || p==&p); + int err = mi_posix_memalign(&p, sizeof(void*), 32); + result = ((err==0 && (uintptr_t)p % sizeof(void*) == 0) || p==&p); mi_free(p); - result = (err==0); }); CHECK_BODY("posix_memalign_no_align", { void* p = &p; - int err = posix_memalign(&p, 3, 32); - mi_assert(p==&p); - result = (err==EINVAL); + int err = mi_posix_memalign(&p, 3, 32); + result = (err==EINVAL && p==&p); }); CHECK_BODY("posix_memalign_zero", { void* p = &p; - int err = posix_memalign(&p, sizeof(void*), 0); + int err = mi_posix_memalign(&p, sizeof(void*), 0); mi_free(p); result = (err==0); }); CHECK_BODY("posix_memalign_nopow2", { void* p = &p; - int err = posix_memalign(&p, 3*sizeof(void*), 32); + int err = mi_posix_memalign(&p, 3*sizeof(void*), 32); result = (err==EINVAL && p==&p); }); CHECK_BODY("posix_memalign_nomem", { void* p = &p; - int err = posix_memalign(&p, sizeof(void*), SIZE_MAX); + int err = mi_posix_memalign(&p, sizeof(void*), SIZE_MAX); result = (err==ENOMEM && p==&p); }); - #endif // --------------------------------------------------- // Aligned API @@ -147,12 +143,30 @@ int main() { mi_free(p1); result = (result1&&result2); }); + CHECK_BODY("malloc-aligned4", { + void* p; + bool ok = true; + for (int i = 0; i < 8 && ok; i++) { + p = mi_malloc_aligned(8, 16); + ok = (p != NULL && (uintptr_t)(p) % 16 == 0); mi_free(p); + } + result = ok; + }); CHECK_BODY("malloc-aligned-at1", { void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p); }); CHECK_BODY("malloc-aligned-at2", { void* p = mi_malloc_aligned_at(50,32,8); result = (p != NULL && ((uintptr_t)(p) + 8) % 32 == 0); mi_free(p); - }); + }); + CHECK_BODY("memalign1", { + void* p; + bool ok = true; + for (int i = 0; i < 8 && ok; i++) { + p = mi_memalign(16,8); + ok = (p != NULL && (uintptr_t)(p) % 16 == 0); mi_free(p); + } + result = ok; + }); // --------------------------------------------------- // Heaps From 82684042be1be44d34caecc915fb51755278d843 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 17 Feb 2020 10:10:22 -0800 Subject: [PATCH 292/293] bump version to 1.6.1 --- include/mimalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index f057c78d..85f25ffb 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 160 // major + 2 digits minor +#define MI_MALLOC_VERSION 161 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From 6e1ca96a4965c776c10698c24dae576523178ef5 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 17 Feb 2020 10:19:29 -0800 Subject: [PATCH 293/293] Update readme.md --- readme.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/readme.md b/readme.md index e4e96ba7..423c91b9 100644 --- a/readme.md +++ b/readme.md @@ -11,7 +11,7 @@ mimalloc (pronounced "me-malloc") is a general purpose allocator with excellent [performance](#performance) characteristics. Initially developed by Daan Leijen for the run-time systems of the [Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. -Latest release:`v1.6.0` (2020-02-09). +Latest release:`v1.6.1` (2020-02-17). It is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -57,6 +57,7 @@ Enjoy! ### Releases +* 2020-02-17, `v1.6.1`: stable release 1.6: minor updates (build with clang-cl, fix alignment issue for small objects). * 2020-02-09, `v1.6.0`: stable release 1.6: fixed potential memory leak, improved overriding and thread local support on FreeBSD, NetBSD, DragonFly, and macOSX. New byte-precise heap block overflow detection in debug mode (besides the double-free detection and free-list @@ -275,8 +276,7 @@ resolved to the _mimalloc_ library. Note that certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). -Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is -actively worked on to fix this (see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)). +(Note: macOS support for dynamic overriding is recent, please report any issues.) ### Override on Windows