Compare commits

...

269 commits

Author SHA1 Message Date
Daan Leijen
e394e340e4 Merge branch 'dev' into dev3 2025-01-03 18:11:11 -08:00
Daan Leijen
e14c8fc795 bump version to 3.0.0 2025-01-03 18:08:34 -08:00
Daan Leijen
07bf4eea26 merge from dev 2025-01-03 18:07:01 -08:00
Daan Leijen
c95d9865a8 merge from dev3-bin 2025-01-03 14:27:18 -08:00
Daan Leijen
03d816d7be Merge branch 'dev3' into dev3-bin 2025-01-03 14:26:44 -08:00
Daan Leijen
6099f76c8c nicer logic in free 2025-01-03 14:26:32 -08:00
daanx
b432f77bfc Merge branch 'dev3' into dev3-bin 2025-01-03 13:50:37 -08:00
daanx
f6c2550eac fix enable large pages 2025-01-03 13:50:31 -08:00
Daan Leijen
b6adbbca0c combine flags and xthread_id 2025-01-03 13:15:46 -08:00
Daan Leijen
3c43225c1f fix initialization warning on gcc 2025-01-03 08:51:02 -08:00
Daan Leijen
281a513642 fix initialization warning on gcc 2025-01-03 08:48:06 -08:00
Daan Leijen
bbd7a492f0 fix signedness warning 2025-01-03 08:46:30 -08:00
Daan Leijen
7e539cc353 Merge branch 'dev3' into dev3-bin 2025-01-03 08:38:45 -08:00
Daan Leijen
2a75500ac2 disable large pages by default 2025-01-03 08:38:36 -08:00
Daan Leijen
4e43ebb496 Merge branch 'dev' into dev3 2025-01-03 08:17:44 -08:00
Daan Leijen
53873df613 Merge branch 'dev3' into dev3-bin 2025-01-02 17:25:49 -08:00
Daan Leijen
211f11218e merge from dev 2025-01-02 17:25:38 -08:00
Daan Leijen
9363900f75 Merge branch 'dev3-bin' of e:\dev\mimalloc3 into dev3-bin 2025-01-02 15:21:43 -08:00
daanx
ab78d57a84 search size bins from small to large 2025-01-02 15:19:08 -08:00
daanx
d25f714ff5 merge from dev3 2025-01-02 15:06:31 -08:00
daanx
d242e86e74 Merge branch 'dev' into dev3 2025-01-02 15:02:57 -08:00
daanx
34e402e128 fix NX test in try_find_and_clearN 2025-01-02 15:00:17 -08:00
daanx
10b40f90fc fix scan of NX 2025-01-02 14:59:42 -08:00
Daan Leijen
44264b3d8b Merge branch 'dev3-bin' of e:\dev\mimalloc3 into dev3-bin 2025-01-02 12:45:38 -08:00
daanx
670ebd0348 merge from dev3; make medium bin larger than other 2025-01-02 12:24:27 -08:00
daanx
5e26ba6fe6 fix debug output 2025-01-02 12:14:12 -08:00
daanx
3933ac9a3f merge from dev3 2025-01-02 11:54:26 -08:00
daanx
c507ee3d96 make bitmap scan cross bfields for NX; disable the use of large object pages 2025-01-02 11:42:28 -08:00
daanx
ff52ea0553 Merge branch 'dev3' into dev3-bin 2024-12-31 15:11:24 -08:00
daanx
0d302cd174 add comments 2024-12-31 15:11:09 -08:00
Daan Leijen
84f2038a2c Merge branch 'dev3' into dev3-bin 2024-12-31 14:28:25 -08:00
Daan Leijen
9665d604d3 merge from dev 2024-12-31 14:28:09 -08:00
Daan
9511d09529 add neon version for chunk all_set 2024-12-26 23:51:37 -08:00
Daan
dddcd5de16 add neon version for chunk_is_clear 2024-12-26 23:49:38 -08:00
Daan
82a8b2445e Merge branch 'dev3' into dev3-bin 2024-12-26 23:12:11 -08:00
Daan
8a4c26377f add neon code for bit clear 2024-12-26 23:12:03 -08:00
Daan
c9ab24899c Merge branch 'dev3' into dev3-bin 2024-12-26 11:19:32 -08:00
Daan
e6d9011b9d Merge branch 'dev' into dev3 2024-12-26 11:19:04 -08:00
daanx
e359e9b12b merge from dev3 2024-12-26 10:43:10 -08:00
daanx
fb704834c4 Merge branch 'dev3' into dev3-bin 2024-12-26 10:42:35 -08:00
daanx
0a7fd7eb6f use fixed tls on windows with static linking 2024-12-26 10:42:24 -08:00
daanx
807b5cd342 Merge branch 'dev3' into dev3-bin 2024-12-26 10:38:02 -08:00
daanx
8b6eb4752b merge from dev, add decl_hidden for better codegen on page_map loading 2024-12-26 10:37:51 -08:00
daanx
f72ac7a5aa add attr_noexept for better codegen on msvc 2024-12-26 10:28:36 -08:00
daanx
4c5bc125ab Merge branch 'dev3' into dev3-bin 2024-12-26 10:25:03 -08:00
daanx
b70fd1093a merge from dev 2024-12-26 10:24:56 -08:00
daanx
2aad74e0c3 Merge branch 'dev3' into dev3-bin 2024-12-26 10:15:38 -08:00
daanx
bec06cfb95 merge from dev 2024-12-26 10:15:08 -08:00
daanx
27e0c467ae fix c++ initializer warning 2024-12-25 14:56:11 -08:00
Daan Leijen
76d50d4566 Merge branch 'dev3' into dev3-bin 2024-12-25 14:41:43 -08:00
Daan Leijen
efe10513ec fix initializer warning on clang-18 2024-12-25 14:40:32 -08:00
daanx
a245135d89 Merge branch 'dev3' into dev3-bin 2024-12-25 14:12:52 -08:00
daanx
5f13941c18 fix constructor re-initialization on subproc_main 2024-12-25 14:12:45 -08:00
daanx
c65d5b878b Merge branch 'dev3' into dev3-bin 2024-12-25 13:30:50 -08:00
daanx
7ae726bb39 small fixes 2024-12-25 13:30:42 -08:00
daanx
b5c4a3c6e7 merge from dev3 2024-12-25 11:47:54 -08:00
daanx
8339cefdeb fix stats for delay purge commit 2024-12-25 11:45:01 -08:00
daanx
15061be4b2 commit page-map within one allocation 2024-12-25 10:50:49 -08:00
daanx
ce7eb4db7a fix page commit-on-demand setting 2024-12-25 10:49:49 -08:00
daanx
5a663da9aa fix build warning 2024-12-24 20:38:36 -08:00
daanx
e64d6fcc47 Merge branch 'dev3' into dev3-bin 2024-12-24 20:23:47 -08:00
daanx
24b8384f80 remove is_expandable requirement on page candidates 2024-12-24 20:23:37 -08:00
daanx
a65742fdf9 merge from dev3 2024-12-24 20:21:56 -08:00
daanx
8259c0eb7c nice colors for heap maps 2024-12-24 20:10:44 -08:00
daanx
50d22cf092 Merge branch 'dev3' into dev3-bin 2024-12-24 17:15:01 -08:00
daanx
4d1d3471cf rename page options 2024-12-24 17:14:53 -08:00
daanx
fe8e52cbcc Merge branch 'dev3' into dev3-bin 2024-12-24 17:07:19 -08:00
daanx
1e1a12bf3c fix rounding issue with huge size allocations 2024-12-24 17:07:11 -08:00
daanx
6f6190c8a9 Merge branch 'dev3' into dev3-bin 2024-12-24 16:40:02 -08:00
daanx
d862e57955 fix huge page allocation size 2024-12-24 16:39:54 -08:00
daanx
e078879825 Merge branch 'dev3' into dev3-bin 2024-12-24 15:00:14 -08:00
daanx
ad6f48f3e4 fix assertion for huge pages 2024-12-24 15:00:05 -08:00
Daan Leijen
431370df62 Merge branch 'dev3' into dev3-bin 2024-12-24 12:10:46 -08:00
Daan Leijen
016b36d917 fix max va bits on unix 2024-12-24 12:10:34 -08:00
Daan Leijen
71a1645d4d fix build 2024-12-24 12:04:21 -08:00
daanx
7c331a967b merge from dev3 2024-12-24 11:42:02 -08:00
daanx
d21114b5f2 improve page commit on demand 2024-12-24 11:37:52 -08:00
daanx
ba68810333 commit page on demand 2024-12-23 18:33:37 -08:00
daanx
9a7c0d443a max obj size 1/8 of a page 2024-12-23 17:15:13 -08:00
daanx
b77b34df96 double arena per 4; large page objects 1/8 of large page size 2024-12-23 17:10:34 -08:00
daanx
3fa3476712 Merge branch 'dev3' into dev3-bin 2024-12-23 16:47:08 -08:00
daanx
9bad269c51 fix purge delay check for arenas 2024-12-23 16:47:01 -08:00
daanx
c65c6d83bd fix guard page size 2024-12-23 16:31:42 -08:00
daanx
b515a0ad4c add _mi_os_guard_page_size 2024-12-23 16:28:34 -08:00
daanx
88d8ee964f remove is_large member (and use is_pinned for this) 2024-12-23 15:04:06 -08:00
daanx
657135de36 commit 2level page-map on over-commit systems 2024-12-23 09:53:52 -08:00
daanx
da2ab86e9f Merge branch 'dev3' into dev3-bin 2024-12-22 22:31:26 -08:00
daanx
bc5ae31649 add abandoned_visit_blocks 2024-12-22 22:31:16 -08:00
daanx
04970f43e5 document way to use a TLS slot on windows 2024-12-22 21:55:40 -08:00
daanx
dd1b37c9f8 fix recursive tls access on macOS <= 14 2024-12-22 21:03:03 -08:00
daanx
8d2b7b0383 merge from dev3 2024-12-22 18:34:39 -08:00
daanx
36bf7dfc45 Merge branch 'dev3' into dev3-bin 2024-12-22 18:33:56 -08:00
daanx
f605cb73e5 old purge delay 2024-12-22 18:33:44 -08:00
daanx
823f5b7ecd merge from dev3 2024-12-22 18:32:47 -08:00
daanx
e61ab67185 cleanup 2024-12-22 18:31:33 -08:00
daanx
1eea4309b6 Merge branch 'dev3' into dev3-bin 2024-12-22 18:09:27 -08:00
daanx
db82baf1a8 cleanup, some renaming 2024-12-22 18:09:16 -08:00
daanx
9ecadaecd5 clean up 2024-12-22 17:55:56 -08:00
daanx
b920fc1b72 merge from dev3 2024-12-22 17:38:48 -08:00
daanx
773fe7ae5b support full secure build 2024-12-22 17:25:58 -08:00
daanx
516e644359 rename option pagemap_commit; always commit the page map on macos (for now) 2024-12-22 16:06:49 -08:00
daanx
6b97830f6a merge from dev3 2024-12-22 14:40:46 -08:00
daanx
c5cfc92f0c small fixes 2024-12-22 14:39:57 -08:00
daanx
a42a2a926b improving level 2 page-map 2024-12-22 14:18:33 -08:00
daanx
3c7d7e1f11 experiment with 2 level pagemap 2024-12-22 14:07:57 -08:00
daanx
8d16303aa6 add -mtune=native with opt arch 2024-12-22 12:21:31 -08:00
daanx
93fa8d895a revert back to flat address map 2024-12-22 12:18:53 -08:00
daanx
c9b2d31665 fix page_map initialization 2024-12-21 23:17:11 -08:00
daanx
56cbddfc7e initial work on a two-level page-map 2024-12-21 23:08:52 -08:00
daanx
1e2221f512 fix signed/unsigned; fix heap_destroy assert failure 2024-12-21 19:28:53 -08:00
daanx
bfc498e54a Merge branch 'dev3' into dev3-bin 2024-12-21 16:25:04 -08:00
daanx
d7d626cbfa enable collecting from the full page queue 2024-12-21 16:24:56 -08:00
daanx
b991510813 merge from dev3 2024-12-21 15:56:22 -08:00
daanx
da17a59bdb re-add deferred free and heap retired collect 2024-12-21 15:53:50 -08:00
daanx
5de5550c63 merge from dev3 2024-12-21 15:52:15 -08:00
daanx
c138fba149 merge from dev 2024-12-21 15:49:17 -08:00
daanx
1a6fbdf0b2 merge from dev 2024-12-21 15:48:49 -08:00
daanx
108c84e858 remove req_arena parameter to arena_reserve 2024-12-21 14:45:14 -08:00
daanx
7d46478a5f add initial load/unload for heaps 2024-12-21 13:19:06 -08:00
daanx
89b0d5a357 allocate heaps associated with an arena in that arena 2024-12-21 11:53:29 -08:00
daanx
4ad7fedd25 track os abandoned pages in a list 2024-12-21 11:35:30 -08:00
daanx
95aeda4cdd merge subproc stats on delete 2024-12-21 10:53:34 -08:00
daanx
dece8a587b make stats part of a subproc 2024-12-21 10:43:08 -08:00
daanx
daac75af36 fix lock recursion 2024-12-20 22:13:58 -08:00
daanx
a5b7d7f264 subprocesses own arena's 2024-12-20 21:38:31 -08:00
daanx
53857ddaa3 Merge branch 'dev' into dev3 2024-12-20 17:32:32 -08:00
daanx
7141d9f164 remove busy wait for arena reservation 2024-12-20 17:31:48 -08:00
daanx
bc459b5e16 Merge branch 'dev3' of https://github.com/microsoft/mimalloc into dev3 2024-12-20 16:46:18 -08:00
Daan Leijen
278f1ff556 merge from dev; match test-stress 2024-12-20 14:00:02 -08:00
daanx
b2d1b4c472 Merge branch 'dev3-bin' of https://github.com/microsoft/mimalloc into dev3-bin 2024-12-20 13:10:55 -08:00
daanx
efa82e1c7d Merge branch 'dev3' of https://github.com/microsoft/mimalloc into dev3 2024-12-20 13:10:16 -08:00
Daan Leijen
f0f4c9c009 Merge branch 'dev3' into dev3-bin 2024-12-20 13:07:00 -08:00
Daan Leijen
7822438561 merge from dev 2024-12-20 13:06:46 -08:00
Daan Leijen
4322546a9b Merge branch 'dev3' into dev3-bin 2024-12-20 13:01:09 -08:00
Daan Leijen
f6408235f7 merge from dev 2024-12-20 13:01:00 -08:00
Daan Leijen
13a58ac343 Merge branch 'dev3' into dev3-bin 2024-12-20 11:56:16 -08:00
Daan Leijen
5614c5052e don't prefer high used candidate if it is too full 2024-12-20 11:56:04 -08:00
Daan Leijen
2db407d1e9 revert back to generating mimalloc.dll instead of mimalloc-override.dll 2024-12-20 11:54:39 -08:00
daanx
3746bf79ed small fixes; max object size 1/8th of a pages 2024-12-19 21:30:03 -08:00
daanx
9a4c264e76 Merge branch 'dev3' into dev3-bin 2024-12-19 19:18:10 -08:00
daanx
de8001c107 add specialized is_set for 1 bit 2024-12-19 19:18:04 -08:00
daanx
8dd605099b fix arm64ec asm 2024-12-19 15:29:40 -08:00
daanx
02b59e0f15 Merge branch 'dev3' into dev3-bin 2024-12-19 11:01:12 -08:00
daanx
b18e1546a7 merge from dev 2024-12-18 15:59:33 -08:00
daanx
2d679959b7 Merge branch 'dev3' into dev3-bin 2024-12-17 19:13:14 -08:00
daanx
264d5a6704 update stat adjustment for purging 2024-12-17 19:13:03 -08:00
daanx
fb90938408 adjust stats more clearly to avoid double counting commits 2024-12-17 19:11:23 -08:00
daanx
2a3969ffc7 Merge branch 'dev3' into dev3-bin 2024-12-17 18:57:20 -08:00
Daan Leijen
58b726be6f better stats for commit on overcommit systems (by not counting on-demand commit upfront) 2024-12-17 18:57:00 -08:00
daanx
587eabe72b Merge branch 'dev3' into dev3-bin 2024-12-17 18:10:37 -08:00
daanx
84bb1c2712 adjust stats more clearly to avoid double counting commits 2024-12-17 18:10:28 -08:00
daanx
21c05019b7 Merge branch 'dev' into dev3 2024-12-17 17:54:24 -08:00
daanx
34d03f3981 atomically clear purge bits when visiting 2024-12-17 12:32:18 -08:00
daanx
6e2a64b81e merge from dev3 2024-12-17 11:58:02 -08:00
daanx
c585753dce fix purging with ranges 2024-12-17 11:54:26 -08:00
daanx
68a90ceb9a add ranges for purging 2024-12-17 11:44:14 -08:00
daanx
adfeb1f6f2 fix bug in bitmap_forall_ranges 2024-12-17 10:43:31 -08:00
daanx
fdad1a0d4f fix infoslices needed calculation 2024-12-17 09:49:09 -08:00
Daan Leijen
98171fd80a testing on arm64 2024-12-17 00:24:32 -08:00
Daan Leijen
d4a2813ff8 Merge branch 'dev3' into dev3-bin 2024-12-17 00:17:32 -08:00
Daan Leijen
63d0c8f861 merge from dev 2024-12-17 00:14:03 -08:00
daanx
d9397be178 comments 2024-12-16 10:00:32 -08:00
daanx
037cb167f8 comments 2024-12-16 09:51:54 -08:00
daanx
d2f670e6e5 add delay to purg'ing; call collect_retired every N generic allocs 2024-12-15 19:54:01 -08:00
daanx
3330d4353a remove maxaccessed from general bitmaps 2024-12-15 19:15:00 -08:00
daanx
e24217e69c more bbin size classes, bug fixes 2024-12-15 18:35:12 -08:00
daanx
df9009a060 wip: binned bitmap for the free slices 2024-12-15 17:15:56 -08:00
daanx
3153e5a4c5 small fixes 2024-12-15 13:47:33 -08:00
daanx
13ee94cef6 fix concurrent mi_tld access bug 2024-12-15 13:22:00 -08:00
daanx
4aeb2e1005 flexible clearN_ that can start at any index 2024-12-15 13:21:13 -08:00
daanx
b5dfd233e9 fix avx2 bug with atomics 2024-12-13 19:59:08 -08:00
daanx
216c04f8d9 clean up bitmap api 2024-12-13 18:39:03 -08:00
daanx
4c81c3cf90 enable purging of free committed slices from arenas 2024-12-13 13:17:00 -08:00
daanx
42af184ce9 wip: start on purge 2024-12-13 09:04:23 -08:00
daanx
ba39e4d65b wip: start on purge 2024-12-13 09:03:17 -08:00
Daan
3010d5890f fix assertion 2024-12-12 20:27:46 -08:00
daanx
e43eb1f191 nicer debug output 2024-12-12 20:22:24 -08:00
daanx
b53ac835f1 comment 2024-12-12 20:01:37 -08:00
daanx
623eaedf33 add debug output for page map; free tld on thread exit 2024-12-12 19:59:54 -08:00
daanx
637de624b3 fix free bug for meta data 2024-12-12 19:55:45 -08:00
daanx
d5c4a16e58 lower full page retain more aggressively in a threadpool 2024-12-12 17:57:36 -08:00
daanx
df956c4a17 use thread spacing for reclaim as well 2024-12-12 17:22:41 -08:00
daanx
98879ac8bc use thread spacing for reclaim as well 2024-12-12 17:22:00 -08:00
daanx
118bd8c97f space out threads when searching for free pages 2024-12-12 16:37:31 -08:00
daanx
94ce342ea9 maintain pages set for arenas; improve arena load/unload 2024-12-11 22:06:25 -08:00
daanx
aed76f2910 wip: allow arena (re)loading 2024-12-11 20:34:23 -08:00
daanx
ccf5e36e6b use frac 8 for reclaim_on_free and reabandon; halve full_page_retain if running in a threadpool 2024-12-11 16:26:39 -08:00
daanx
1c8d15abac fix build error 2024-12-11 14:30:44 -08:00
daanx
ab53a73cbd small updates 2024-12-11 14:29:06 -08:00
daanx
565656919e fix comments in types; fix guarded alignment bug 2024-12-11 13:04:37 -08:00
daanx
64eea823e4 use always abandon on heap delete 2024-12-11 09:24:38 -08:00
daanx
24d3c1bc14 heap meta data always uses mi_meta_zalloc 2024-12-11 09:16:28 -08:00
daanx
6774130c9a Merge ..\mimalloc into dev3 2024-12-10 20:46:12 -08:00
daanx
64c4181ffa better block alignment 2024-12-10 20:32:48 -08:00
daanx
c478ddaab4 fix MI_GUARDED build 2024-12-10 19:44:54 -08:00
daanx
2a1c346281 Merge branch 'dev3' of https://github.com/microsoft/mimalloc into dev3 2024-12-10 15:12:13 -08:00
Daan
13be5d6740 use non-null tld in heap_init 2024-12-10 15:11:46 -08:00
daanx
7cd8f31f30 improve popcount 2024-12-10 14:50:55 -08:00
Daan
f37aff6ee2 fix for macOS 14 and earlier 2024-12-09 22:27:40 -08:00
Daan
6798375f47 temporarily add macOS 13 and 12 for testing 2024-12-09 21:26:23 -08:00
Daan
5e434a6e66 merge from dev 2024-12-09 21:24:30 -08:00
daanx
c5a2d11193 add extra checks for valid pointers in the pagemap, add max_vabits and debug_commit_full_pagemap options 2024-12-09 20:40:26 -08:00
daanx
3a92c35270 improve generic ctz/clz 2024-12-09 20:25:22 -08:00
daanx
e44815ed6f add bsf/bsr for compilation with older compilers (clang 7) 2024-12-09 20:06:48 -08:00
daanx
56a1bd7f9e fix 32 bit multiply in generic ctz/clz 2024-12-09 19:43:00 -08:00
daanx
f28d5c7029 add cast to avoid errors on clang 7 2024-12-09 19:12:03 -08:00
daanx
bbcbd3cd1f add cast to avoid errors on clang 7 2024-12-09 19:06:06 -08:00
Daan
3f732a981f fix debug build of MI_GUARDED 2024-12-09 15:49:20 -08:00
Daan
8f5449d271 various fixes for test pipeline 2024-12-09 15:39:15 -08:00
Daan
351cb0c740 small fixes for macOS 2024-12-09 15:16:36 -08:00
daanx
d5ed0cc71e various improvements 2024-12-09 14:31:43 -08:00
daanx
68ac94c1ba set default arena reserve back to 1GiB 2024-12-08 18:53:43 -08:00
daanx
bf2f2a8bf4 fix bug where only the first chunkmap field would be considered 2024-12-08 18:48:56 -08:00
daanx
88990cec2d merge from dev 2024-12-08 18:27:05 -08:00
daanx
2a4af6f169 comments 2024-12-08 17:21:17 -08:00
daanx
2084df3dde add dedicated meta data allocation for threads and tld 2024-12-08 12:20:54 -08:00
daanx
67cc424ada delete old files 2024-12-08 09:19:05 -08:00
daanx
36bb599873 merge from dev 2024-12-08 09:15:09 -08:00
daanx
2ed6e03d27 update optimization on haswell 2024-12-08 09:14:16 -08:00
daanx
e446bc27e5 Merge ..\mimalloc into dev3 2024-12-08 09:03:33 -08:00
daanx
5a06d2aeba update bit primitives 2024-12-08 09:03:25 -08:00
daanx
c33de86da3 check for running in a threadpool to disable page reclaim 2024-12-07 17:11:11 -08:00
daanx
d0c86f3f0e specialize bitmap operations for common page sizes 2024-12-07 16:26:07 -08:00
daanx
bf42759d97 check heaptag on abandonded page allocation 2024-12-07 15:13:17 -08:00
daanx
6b52b19e3b arch specific optimizations 2024-12-07 15:02:27 -08:00
daanx
0e5d5831e4 Merge ..\mimalloc into dev3 2024-12-07 14:17:05 -08:00
daanx
bef52b96f6 Merge ../mimalloc into dev3 2024-12-07 14:04:02 -08:00
daanx
9631b0d4d2 revise visiting arenas, better bitmap scanning 2024-12-07 14:03:51 -08:00
daanx
70115d8b8c small fixes 2024-12-06 23:25:53 -08:00
daanx
bf9a2ddb59 compile for 32-bit as well 2024-12-06 23:07:10 -08:00
daanx
659a9dd51d fix page info size and order; atomic page flags 2024-12-06 22:37:59 -08:00
daanx
5a5943ad33 record max_clear bit 2024-12-06 21:03:33 -08:00
daanx
61436a92b9 working simplified version without pairmaps and bitmap epoch 2024-12-06 15:26:01 -08:00
daanx
ec9c61c066 initial no more pairmap 2024-12-06 14:53:24 -08:00
daanx
7443ee317e tune free-ing and abandoning 2024-12-05 17:00:23 -08:00
daanx
0616ee151e change to full_page_retain 2024-12-05 11:29:25 -08:00
daanx
bc67be4d79 small adjustments 2024-12-04 21:40:57 -08:00
daanx
afe9089152 more documentation; better pairmap find_and_set_to_busy, busy flag is now 0x10 2024-12-04 19:15:55 -08:00
daanx
45f7fb559a small fixes 2024-12-04 00:14:56 -08:00
daanx
bc7fe399b1 large bitmaps working; lock on arena_reserve 2024-12-03 23:35:33 -08:00
daanx
e5fdd6e110 wip: initial large bitmaps 2024-12-03 22:43:14 -08:00
daanx
8d9c725482 increase MAX_OBJ_SLICES to a full chunk (32MiB) 2024-12-03 17:27:43 -08:00
daanx
3fc2c8e279 fix assertions 2024-12-03 11:06:07 -08:00
daanx
666c089fc8 revise free reclaim; ensure unown cannot race with a free 2024-12-03 10:51:13 -08:00
daanx
833b091ff9 can run the full test suite 2024-12-02 20:25:44 -08:00
daanx
bd5f7de3f4 can run basic test 2024-12-02 20:21:35 -08:00
daanx
fe5a314114 add base and size to OS memid 2024-12-02 19:31:36 -08:00
daanx
5e95ebc7a0 fix free stats 2024-12-02 17:46:41 -08:00
daanx
c9abfe8253 wip: can run mstress 2024-12-02 16:24:40 -08:00
daanx
d96c134566 wip: initial version with eager abandonment 2024-12-02 16:01:45 -08:00
daanx
69ac69abac wip: use epoch with 512bit chunks 2024-12-02 00:31:08 -08:00
daanx
2f789aae9a wip: cannot compile 2024-12-01 16:26:59 -08:00
daanx
1d7a9f62a5 bug fixes 2024-12-01 12:54:16 -08:00
daanx
8f2a5864b8 pass all debug tests 2024-11-30 22:54:57 -08:00
daanx
9ebe941ce0 first version that passes the make test 2024-11-30 20:21:32 -08:00
daanx
55b70f1588 wip 2024-11-30 14:00:07 -08:00
daanx
f8d04dc2bc compile with clang and gcc 2024-11-30 12:41:11 -08:00
daanx
d15e83030e wip: rename arena blocks to slices 2024-11-30 12:16:41 -08:00
daanx
309fc26b4b wip: add generic find_and_xset 2024-11-30 12:00:30 -08:00
daanx
188294a0df wip: bug fixes 2024-11-30 11:12:39 -08:00
daanx
9d904e8643 wip: bug fixes 2024-11-30 10:39:30 -08:00
daanx
978d844e15 wip: bug fixes 2024-11-29 20:23:39 -08:00
daanx
0f635413d6 wip: can run initial test 2024-11-29 17:50:37 -08:00
daanx
e0152ab82f wip: update any_set 2024-11-29 16:58:52 -08:00
daanx
9603fe8b50 can compile without missing functions 2024-11-29 16:27:58 -08:00
daanx
68f5fb2f4b wip: further progress on segment removal; arena allocation 2024-11-29 15:08:06 -08:00
daanx
46afcbe06c wip: further progress on segment removal; arena allocation 2024-11-29 14:28:34 -08:00
daanx
441d4fed9f wip: further progress on removing segments 2024-11-29 10:40:18 -08:00
daanx
71cfa45e76 wip: initial work on mimalloc3 without segments 2024-11-28 19:31:04 -08:00
44 changed files with 6707 additions and 5012 deletions

View file

@ -10,25 +10,30 @@ option(MI_PADDING "Enable padding to detect heap block overflow (alway
option(MI_OVERRIDE "Override the standard malloc interface (i.e. define entry points for 'malloc', 'free', etc)" ON)
option(MI_XMALLOC "Enable abort() call on memory allocation failure by default" OFF)
option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF)
option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF)
option(MI_TRACK_ETW "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF)
option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for arm64: '-march=armv8.1-a' (2016))" ON)
option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for x64: '-march=haswell;-mavx2' (2013), for arm64: '-march=armv8.1-a' (2016))" ON)
option(MI_OPT_SIMD "Use SIMD instructions (requires MI_OPT_ARCH to be enabled)" OFF)
option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON)
option(MI_WIN_REDIRECT "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON)
option(MI_WIN_USE_FIXED_TLS "Use a fixed TLS slot on Windows to avoid extra tests in the malloc fast path" OFF)
option(MI_LOCAL_DYNAMIC_TLS "Use local-dynamic-tls, a slightly slower but dlopen-compatible thread local storage mechanism (Unix)" OFF)
option(MI_LIBC_MUSL "Set this when linking with musl libc" OFF)
option(MI_LIBC_MUSL "Enable this when linking with musl libc" OFF)
option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF)
option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF)
option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF)
option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF)
option(MI_TRACK_ETW "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF)
option(MI_BUILD_SHARED "Build shared library" ON)
option(MI_BUILD_STATIC "Build static library" ON)
option(MI_BUILD_OBJECT "Build object library" ON)
option(MI_BUILD_TESTS "Build test executables" ON)
option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF)
option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF)
option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF)
option(MI_NO_PADDING "Force no use of padding even in DEBUG mode etc." OFF)
option(MI_INSTALL_TOPLEVEL "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF)
@ -50,6 +55,7 @@ set(mi_sources
src/alloc-aligned.c
src/alloc-posix.c
src/arena.c
src/arena-meta.c
src/bitmap.c
src/heap.c
src/init.c
@ -57,9 +63,8 @@ set(mi_sources
src/options.c
src/os.c
src/page.c
src/page-map.c
src/random.c
src/segment.c
src/segment-map.c
src/stats.c
src/prim/prim.c)
@ -122,8 +127,8 @@ if(CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo")
if (NOT MI_OPT_ARCH)
message(STATUS "Architecture specific optimizations are disabled (MI_OPT_ARCH=OFF)")
endif()
else()
set(MI_OPT_ARCH OFF)
#else()
# set(MI_OPT_ARCH OFF)
endif()
if(MI_OVERRIDE)
@ -227,7 +232,7 @@ endif()
if(MI_SEE_ASM)
message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)")
list(APPEND mi_cflags -save-temps)
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 14)
message(STATUS "No GNU Line marker")
list(APPEND mi_cflags -Wno-gnu-line-marker)
endif()
@ -398,21 +403,28 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
list(APPEND mi_cflags -ftls-model=initial-exec)
endif()
endif()
endif()
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel")
if(MI_OVERRIDE)
list(APPEND mi_cflags -fno-builtin-malloc)
endif()
if(MI_OPT_ARCH)
if(MI_ARCH STREQUAL "arm64")
set(MI_OPT_ARCH_FLAGS "-march=armv8.1-a") # fast atomics
if(MI_ARCH STREQUAL "x64")
set(MI_OPT_ARCH_FLAGS "-march=haswell;-mavx2;-mtune=native") # fast bit scan (since 2013)
elseif(MI_ARCH STREQUAL "arm64")
set(MI_OPT_ARCH_FLAGS "-march=armv8.1-a;-mtune=native") # fast atomics (since 2016)
endif()
endif()
endif()
if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914)
if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914) # vs2017+
list(APPEND mi_cflags /Zc:__cplusplus)
if(MI_OPT_ARCH)
if(MI_ARCH STREQUAL "arm64")
set(MI_OPT_ARCH_FLAGS "/arch:armv8.1") # fast atomics
if(MI_ARCH STREQUAL "x64")
set(MI_OPT_ARCH_FLAGS "/arch:AVX2")
elseif(MI_ARCH STREQUAL "arm64")
set(MI_OPT_ARCH_FLAGS "/arch:armv8.1")
endif()
endif()
endif()
@ -424,6 +436,12 @@ endif()
if(MI_OPT_ARCH_FLAGS)
list(APPEND mi_cflags ${MI_OPT_ARCH_FLAGS})
message(STATUS "Architecture specific optimization is enabled (with ${MI_OPT_ARCH_FLAGS}) (MI_OPT_ARCH=ON)")
if (MI_OPT_SIMD)
list(APPEND mi_defines "MI_OPT_SIMD=1")
message(STATUS "SIMD instructions are enabled (MI_OPT_SIMD=ON)")
endif()
elseif(MI_OPT_SIMD)
message(STATUS "SIMD instructions are not enabled (either MI_OPT_ARCH=OFF or this architecture has no SIMD support)")
endif()
# extra needed libraries

View file

@ -306,3 +306,28 @@ jobs:
- script: ctest --verbose --timeout 240
workingDirectory: $(BuildType)
displayName: CTest
- job:
displayName: macOS 13 (Ventura)
pool:
vmImage:
macOS-13
strategy:
matrix:
Debug:
BuildType: debug
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
Release:
BuildType: release
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
steps:
- task: CMake@1
inputs:
workingDirectory: $(BuildType)
cmakeArgs: .. $(cmakeExtraArgs)
- script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
displayName: Make
- script: ctest --verbose --timeout 180
workingDirectory: $(BuildType)
displayName: CTest

View file

@ -1,6 +1,6 @@
set(mi_version_major 1)
set(mi_version_minor 8)
set(mi_version_patch 8)
set(mi_version_major 3)
set(mi_version_minor 0)
set(mi_version_patch 0)
set(mi_version ${mi_version_major}.${mi_version_minor})
set(PACKAGE_VERSION ${mi_version})

View file

@ -431,12 +431,11 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large);
/// @param start Start of the memory area
/// @param size The size of the memory area.
/// @param is_committed Is the area already committed?
/// @param is_large Does it consist of large OS pages? Set this to \a true as well for memory
/// that should not be decommitted or protected (like rdma etc.)
/// @param is_pinned Can the memory not be decommitted or reset? (usually the case for large OS pages)
/// @param is_zero Does the area consists of zero's?
/// @param numa_node Possible associated numa node or `-1`.
/// @return \a true if successful, and \a false on error.
bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node);
bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node);
/// Reserve \a pages of huge OS pages (1GiB) evenly divided over \a numa_nodes nodes,
/// but stops after at most `timeout_msecs` seconds.

View file

@ -308,6 +308,7 @@
<CompileAs>CompileAsCpp</CompileAs>
<IntrinsicFunctions>true</IntrinsicFunctions>
<LanguageStandard>stdcpp20</LanguageStandard>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -421,16 +422,7 @@
</ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena-abandoned.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\arena-meta.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
@ -450,6 +442,7 @@
<ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\libc.c" />
<ClCompile Include="..\..\src\page-map.c" />
<ClCompile Include="..\..\src\prim\prim.c" />
<ClCompile Include="..\..\src\prim\windows\prim.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
@ -474,8 +467,6 @@
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment-map.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\os.c" />
<ClCompile Include="..\..\src\stats.c" />
</ItemGroup>
@ -484,6 +475,7 @@
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h" />
<ClInclude Include="..\..\include\mimalloc-new-delete.h" />
<ClInclude Include="..\..\include\mimalloc\atomic.h" />
<ClInclude Include="..\..\include\mimalloc\bits.h" />
<ClInclude Include="..\..\include\mimalloc\internal.h" />
<ClInclude Include="..\..\include\mimalloc\prim.h" />
<ClInclude Include="..\..\include\mimalloc\track.h" />

View file

@ -16,9 +16,6 @@
<ClCompile Include="..\..\src\arena.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\arena-abandoned.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\bitmap.c">
<Filter>Sources</Filter>
</ClCompile>
@ -55,15 +52,15 @@
<ClCompile Include="..\..\src\random.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment-map.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\stats.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\page-map.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\arena-meta.c">
<Filter>Sources</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\include\mimalloc\atomic.h">
@ -93,6 +90,9 @@
<ClInclude Include="..\..\include\mimalloc\prim.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc\bits.h">
<Filter>Headers</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="Headers">

View file

@ -404,11 +404,10 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
<ClInclude Include="..\..\include\mimalloc-etw-gen.h" />
<ClInclude Include="..\..\include\mimalloc-etw.h" />
<ClInclude Include="..\..\include\mimalloc-new-delete.h" />
<ClInclude Include="..\..\include\mimalloc-override.h" />
<ClInclude Include="..\..\include\mimalloc\atomic.h" />
<ClInclude Include="..\..\include\mimalloc\bits.h" />
<ClInclude Include="..\..\include\mimalloc\internal.h" />
<ClInclude Include="..\..\include\mimalloc\prim.h" />
<ClInclude Include="..\..\include\mimalloc\track.h" />
@ -438,7 +437,10 @@
</ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena-abandoned.c">
<ClCompile Include="..\..\src\arena-meta.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.c" />
<ClCompile Include="..\..\src\free.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
@ -448,11 +450,10 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.c" />
<ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\libc.c" />
<ClCompile Include="..\..\src\page-map.c" />
<ClCompile Include="..\..\src\prim\prim.c" />
<ClCompile Include="..\..\src\prim\windows\prim.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
@ -478,13 +479,8 @@
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment-map.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\stats.c" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\include\mimalloc-etw-gen.man" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>

View file

@ -16,9 +16,6 @@
<ClCompile Include="..\..\src\arena.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\arena-abandoned.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\bitmap.c">
<Filter>Sources</Filter>
</ClCompile>
@ -52,15 +49,18 @@
<ClCompile Include="..\..\src\random.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment-map.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\stats.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\page-map.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\free.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\arena-meta.c">
<Filter>Sources</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\include\mimalloc\atomic.h">
@ -75,12 +75,6 @@
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-etw.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-etw-gen.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-new-delete.h">
<Filter>Headers</Filter>
</ClInclude>
@ -96,6 +90,9 @@
<ClInclude Include="..\..\include\mimalloc\prim.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc\bits.h">
<Filter>Headers</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="Headers">
@ -105,9 +102,4 @@
<UniqueIdentifier>{94b40bdc-a741-45dd-81aa-c05fabcd2970}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<None Include="..\..\include\mimalloc-etw-gen.man">
<Filter>Sources</Filter>
</None>
</ItemGroup>
</Project>

View file

@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_H
#define MIMALLOC_H
#define MI_MALLOC_VERSION 188 // major + 2 digits minor
#define MI_MALLOC_VERSION 300 // major + 2 digits minor
// ------------------------------------------------------
// Compiler specific attributes
@ -274,16 +274,16 @@ mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned /* cannot decommit/reset? */, bool is_zero, int numa_node) mi_attr_noexcept;
mi_decl_export void mi_debug_show_arenas(bool show_inuse) mi_attr_noexcept;
mi_decl_export void mi_debug_show_arenas(bool show_pages) mi_attr_noexcept;
// Experimental: heaps associated with specific memory arena's
typedef int mi_arena_id_t;
typedef void* mi_arena_id_t;
mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
#if MI_MALLOC_VERSION >= 182
// Create a heap that only allocates in the specified arena
@ -317,6 +317,23 @@ mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t samp
mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max);
// experimental
//mi_decl_export void* mi_os_alloc(size_t size, bool commit, size_t* full_size);
//mi_decl_export void* mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, void** base, size_t* full_size);
//mi_decl_export void* mi_os_alloc_aligned_allow_large(size_t size, size_t alignment, bool commit, bool* is_committed, bool* is_pinned, void** base, size_t* full_size);
//mi_decl_export void mi_os_free(void* p, size_t size);
//mi_decl_export void mi_os_commit(void* p, size_t size);
//mi_decl_export void mi_os_decommit(void* p, size_t size);
mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size);
mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id);
mi_decl_export bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena);
mi_decl_export void mi_heap_unload(mi_heap_t* heap);
// Is a pointer contained in the given arena area?
mi_decl_export bool mi_arena_contains(mi_arena_id_t arena_id, const void* p);
// ------------------------------------------------------
// Convenience
// ------------------------------------------------------
@ -369,7 +386,6 @@ typedef enum mi_option_e {
mi_option_arena_reserve, // initial memory size for arena reservation (= 1 GiB on 64-bit) (internally, this value is in KiB; use `mi_option_get_size`)
mi_option_arena_purge_mult, // multiplier for `purge_delay` for the purging delay for arenas (=10)
mi_option_purge_extend_delay,
mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1)
mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's)
mi_option_retry_on_oom, // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows)
mi_option_visit_abandoned, // allow visiting heap blocks from abandoned threads (=0)
@ -379,6 +395,12 @@ typedef enum mi_option_e {
mi_option_guarded_sample_rate, // 1 out of N allocations in the min/max range will be guarded (=1000)
mi_option_guarded_sample_seed, // can be set to allow for a (more) deterministic re-execution when a guard page is triggered (=0)
mi_option_target_segments_per_thread, // experimental (=0)
mi_option_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1)
mi_option_page_full_retain, // retain N full pages per size class (=2)
mi_option_page_max_candidates, // max candidate pages to consider for allocation (=4)
mi_option_max_vabits, // max user space virtual address bits to consider (=48)
mi_option_pagemap_commit, // commit the full pagemap (to always catch invalid pointer uses) (=0)
mi_option_page_commit_on_demand, // commit page memory on-demand
_mi_option_last,
// legacy option names
mi_option_large_os_pages = mi_option_allow_large_os_pages,

View file

@ -5,8 +5,8 @@ terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_ATOMIC_H
#define MIMALLOC_ATOMIC_H
#ifndef MI_ATOMIC_H
#define MI_ATOMIC_H
// include windows.h or pthreads.h
#if defined(_WIN32)
@ -75,16 +75,21 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_atomic_exchange_relaxed(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release))
#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_cas_weak_relaxed(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(relaxed),mi_memory_order(relaxed))
#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
#define mi_atomic_cas_strong_relaxed(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(relaxed),mi_memory_order(relaxed))
#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_and_relaxed(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_or_relaxed(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1)
@ -405,10 +410,9 @@ static inline void mi_atomic_yield(void) {
// ----------------------------------------------------------------------
// Locks
// These do not have to be recursive and should be light-weight
// in-process only locks. Only used for reserving arena's and to
// maintain the abandoned list.
// Locks
// These should be light-weight in-process only locks.
// Only used for reserving arena's and to maintain the abandoned list.
// ----------------------------------------------------------------------
#if _MSC_VER
#pragma warning(disable:26110) // unlock with holding lock
@ -534,4 +538,4 @@ static inline void mi_lock_done(mi_lock_t* lock) {
#endif
#endif // __MIMALLOC_ATOMIC_H
#endif // MI_ATOMIC_H

336
include/mimalloc/bits.h Normal file
View file

@ -0,0 +1,336 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Bit operation, and platform dependent definition (MI_INTPTR_SIZE etc)
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITS_H
#define MI_BITS_H
// ------------------------------------------------------
// Size of a pointer.
// We assume that `sizeof(void*)==sizeof(intptr_t)`
// and it holds for all platforms we know of.
//
// However, the C standard only requires that:
// p == (void*)((intptr_t)p))
// but we also need:
// i == (intptr_t)((void*)i)
// or otherwise one might define an intptr_t type that is larger than a pointer...
// ------------------------------------------------------
#if INTPTR_MAX > INT64_MAX
# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example)
#elif INTPTR_MAX == INT64_MAX
# define MI_INTPTR_SHIFT (3)
#elif INTPTR_MAX == INT32_MAX
# define MI_INTPTR_SHIFT (2)
#else
#error platform pointers must be 32, 64, or 128 bits
#endif
#if (INTPTR_MAX) > LONG_MAX
# define MI_PU(x) x##ULL
#else
# define MI_PU(x) x##UL
#endif
#if SIZE_MAX == UINT64_MAX
# define MI_SIZE_SHIFT (3)
typedef int64_t mi_ssize_t;
#elif SIZE_MAX == UINT32_MAX
# define MI_SIZE_SHIFT (2)
typedef int32_t mi_ssize_t;
#else
#error platform objects must be 32 or 64 bits in size
#endif
#if (SIZE_MAX/2) > LONG_MAX
# define MI_ZU(x) x##ULL
#else
# define MI_ZU(x) x##UL
#endif
#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
#define MI_SIZE_SIZE (1<<MI_SIZE_SHIFT)
#define MI_SIZE_BITS (MI_SIZE_SIZE*8)
#define MI_KiB (MI_ZU(1024))
#define MI_MiB (MI_KiB*MI_KiB)
#define MI_GiB (MI_MiB*MI_KiB)
/* --------------------------------------------------------------------------------
Architecture
-------------------------------------------------------------------------------- */
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) // consider arm64ec as arm64
#define MI_ARCH_ARM64 1
#elif defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
#define MI_ARCH_X64 1
#elif defined(__i386__) || defined(__i386) || defined(_M_IX86) || defined(_X86_) || defined(__X86__)
#define MI_ARCH_X86 1
#elif defined(__arm__) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARMT) || defined(__arm)
#define MI_ARCH_ARM32 1
#elif defined(__riscv) || defined(_M_RISCV)
#define MI_ARCH_RISCV 1
#if (LONG_MAX == INT32_MAX)
#define MI_ARCH_RISCV32 1
#else
#define MI_ARCH_RISCV64 1
#endif
#endif
#if MI_ARCH_X64 && defined(__AVX2__)
#include <immintrin.h>
#elif MI_ARCH_ARM64 && MI_OPT_SIMD
#include <arm_neon.h>
#endif
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
#include <intrin.h>
#endif
#if MI_ARCH_X64 && defined(__AVX2__) && !defined(__BMI2__) // msvc
#define __BMI2__ 1
#endif
#if MI_ARCH_X64 && (defined(__AVX2__) || defined(__BMI2__)) && !defined(__BMI1__) // msvc
#define __BMI1__ 1
#endif
// Define big endian if needed
// #define MI_BIG_ENDIAN 1
// maximum virtual address bits in a user-space pointer
#if MI_DEFAULT_VIRTUAL_ADDRESS_BITS > 0
#define MI_MAX_VABITS MI_DEFAULT_VIRTUAL_ADDRESS_BITS
#elif MI_ARCH_X64
#define MI_MAX_VABITS (47)
#elif MI_INTPTR_SIZE > 4
#define MI_MAX_VABITS (48)
#else
#define MI_MAX_VABITS (32)
#endif
// use a flat page-map (or a 2-level one)
#ifndef MI_PAGE_MAP_FLAT
#if MI_MAX_VABITS <= 40 && !defined(__APPLE__)
#define MI_PAGE_MAP_FLAT 1
#else
#define MI_PAGE_MAP_FLAT 0
#endif
#endif
/* --------------------------------------------------------------------------------
Builtin's
-------------------------------------------------------------------------------- */
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#define mi_builtin(name) __builtin_##name
#define mi_has_builtin(name) __has_builtin(__builtin_##name)
#if (LONG_MAX == INT32_MAX)
#define mi_builtin32(name) mi_builtin(name##l)
#define mi_has_builtin32(name) mi_has_builtin(name##l)
#else
#define mi_builtin32(name) mi_builtin(name)
#define mi_has_builtin32(name) mi_has_builtin(name)
#endif
#if (LONG_MAX == INT64_MAX)
#define mi_builtin64(name) mi_builtin(name##l)
#define mi_has_builtin64(name) mi_has_builtin(name##l)
#else
#define mi_builtin64(name) mi_builtin(name##ll)
#define mi_has_builtin64(name) mi_has_builtin(name##ll)
#endif
#if (MI_SIZE_BITS == 32)
#define mi_builtinz(name) mi_builtin32(name)
#define mi_has_builtinz(name) mi_has_builtin32(name)
#define mi_msc_builtinz(name) name
#elif (MI_SIZE_BITS == 64)
#define mi_builtinz(name) mi_builtin64(name)
#define mi_has_builtinz(name) mi_has_builtin64(name)
#define mi_msc_builtinz(name) name##64
#endif
/* --------------------------------------------------------------------------------
Popcount and count trailing/leading zero's
-------------------------------------------------------------------------------- */
size_t _mi_popcount_generic(size_t x);
static inline size_t mi_popcount(size_t x) {
#if mi_has_builtinz(popcount)
return mi_builtinz(popcount)(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
return mi_msc_builtinz(__popcnt)(x);
#elif MI_ARCH_X64 && defined(__BMI1__)
return (size_t)_mm_popcnt_u64(x);
#else
#define MI_HAS_FAST_POPCOUNT 0
return (x<=1 ? x : _mi_popcount_generic(x));
#endif
}
#ifndef MI_HAS_FAST_POPCOUNT
#define MI_HAS_FAST_POPCOUNT 1
#endif
size_t _mi_clz_generic(size_t x);
size_t _mi_ctz_generic(size_t x);
static inline size_t mi_ctz(size_t x) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0
size_t r;
__asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
return r;
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long idx;
return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS);
#elif mi_has_builtinz(ctz)
return (x!=0 ? (size_t)mi_builtinz(ctz)(x) : MI_SIZE_BITS);
#elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
if (x==0) return MI_SIZE_BITS;
size_t r;
__asm ("bsf\t%1, %0" : "=r"(r) : "r"(x) : "cc");
return r;
#elif MI_HAS_FAST_POPCOUNT
return (x!=0 ? (mi_popcount(x^(x-1))-1) : MI_SIZE_BITS);
#else
#define MI_HAS_FAST_BITSCAN 0
return (x!=0 ? _mi_ctz_generic(x) : MI_SIZE_BITS);
#endif
}
static inline size_t mi_clz(size_t x) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0
size_t r;
__asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
return r;
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long idx;
return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS);
#elif mi_has_builtinz(clz)
return (x!=0 ? (size_t)mi_builtinz(clz)(x) : MI_SIZE_BITS);
#elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
if (x==0) return MI_SIZE_BITS;
size_t r;
__asm ("bsr\t%1, %0" : "=r"(r) : "r"(x) : "cc");
return (MI_SIZE_BITS - 1 - r);
#else
#define MI_HAS_FAST_BITSCAN 0
return (x!=0 ? _mi_clz_generic(x) : MI_SIZE_BITS);
#endif
}
#ifndef MI_HAS_FAST_BITSCAN
#define MI_HAS_FAST_BITSCAN 1
#endif
/* --------------------------------------------------------------------------------
find trailing/leading zero (bit scan forward/reverse)
-------------------------------------------------------------------------------- */
// Bit scan forward: find the least significant bit that is set (i.e. count trailing zero's)
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bsf(size_t x, size_t* idx) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9)
// on x64 the carry flag is set on zero which gives better codegen
bool is_zero;
__asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
return !is_zero;
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long i;
return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false);
#else
return (x!=0 ? (*idx = mi_ctz(x), true) : false);
#endif
}
// Bit scan reverse: find the most significant bit that is set
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bsr(size_t x, size_t* idx) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9)
// on x64 the carry flag is set on zero which gives better codegen
bool is_zero;
__asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
return !is_zero;
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long i;
return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false);
#else
return (x!=0 ? (*idx = MI_SIZE_BITS - 1 - mi_clz(x), true) : false);
#endif
}
/* --------------------------------------------------------------------------------
rotate
-------------------------------------------------------------------------------- */
static inline size_t mi_rotr(size_t x, size_t r) {
#if (mi_has_builtin(rotateright64) && MI_SIZE_BITS==64)
return mi_builtin(rotateright64)(x,r);
#elif (mi_has_builtin(rotateright32) && MI_SIZE_BITS==32)
return mi_builtin(rotateright32)(x,r);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_ARM64)
return _rotr64(x, (int)r);
#elif defined(_MSC_VER) && (MI_ARCH_X86 || MI_ARCH_ARM32)
return _lrotr(x,(int)r);
#else
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
return ((x >> rshift) | (x << ((-rshift) & (MI_SIZE_BITS-1))));
#endif
}
static inline size_t mi_rotl(size_t x, size_t r) {
#if (mi_has_builtin(rotateleft64) && MI_SIZE_BITS==64)
return mi_builtin(rotateleft64)(x,r);
#elif (mi_has_builtin(rotateleft32) && MI_SIZE_BITS==32)
return mi_builtin(rotateleft32)(x,r);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_ARM64)
return _rotl64(x, (int)r);
#elif defined(_MSC_VER) && (MI_ARCH_X86 || MI_ARCH_ARM32)
return _lrotl(x, (int)r);
#else
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
return ((x << rshift) | (x >> ((-rshift) & (MI_SIZE_BITS-1))));
#endif
}
static inline uint32_t mi_rotl32(uint32_t x, uint32_t r) {
#if mi_has_builtin(rotateleft32)
return mi_builtin(rotateleft32)(x,r);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
return _lrotl(x, (int)r);
#else
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
const unsigned int rshift = (unsigned int)(r) & 31;
return ((x << rshift) | (x >> ((-rshift) & 31)));
#endif
}
#endif // MI_BITS_H

File diff suppressed because it is too large Load diff

View file

@ -5,8 +5,8 @@ terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_PRIM_H
#define MIMALLOC_PRIM_H
#ifndef MI_PRIM_H
#define MI_PRIM_H
// --------------------------------------------------------------------------
@ -117,7 +117,8 @@ void _mi_prim_thread_done_auto_done(void);
// Called when the default heap for a thread changes
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
// Is this thread part of a thread pool?
bool _mi_prim_thread_is_in_threadpool(void);
@ -269,35 +270,42 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
// defined in `init.c`; do not use these directly
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
extern bool _mi_process_is_initialized; // has mi_process_init been called?
extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called?
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept;
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
const mi_threadid_t tid = __mi_prim_thread_id();
mi_assert_internal(tid > 1);
mi_assert_internal((tid & MI_PAGE_FLAG_MASK) == 0); // bottom 2 bits are clear?
return tid;
}
// Get a unique id for the current thread.
#if defined(MI_PRIM_THREAD_ID)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
return MI_PRIM_THREAD_ID(); // used for example by CPython for a free threaded build (see python/cpython#115488)
}
#elif defined(_WIN32)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
}
#elif MI_USE_BUILTIN_THREAD_POINTER
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
// Works on most Unix based platforms with recent compilers
return (uintptr_t)__builtin_thread_pointer();
}
#elif MI_HAS_TLS_SLOT
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
#if defined(__BIONIC__)
// issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
// see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
@ -313,7 +321,7 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
#else
// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
return (uintptr_t)&_mi_heap_default;
}
@ -416,4 +424,4 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) {
#endif // mi_prim_get_default_heap()
#endif // MIMALLOC_PRIM_H
#endif // MI_PRIM_H

View file

@ -5,8 +5,8 @@ terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_TRACK_H
#define MIMALLOC_TRACK_H
#ifndef MI_TRACK_H
#define MI_TRACK_H
/* ------------------------------------------------------------------------------------------------------
Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers.
@ -142,4 +142,4 @@ defined, undefined, or not accessible at all:
}
#endif
#endif
#endif // MI_TRACK_H

View file

@ -5,17 +5,15 @@ terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_TYPES_H
#define MIMALLOC_TYPES_H
#ifndef MI_TYPES_H
#define MI_TYPES_H
// --------------------------------------------------------------------------
// This file contains the main type definitions for mimalloc:
// mi_heap_t : all data for a thread-local heap, contains
// lists of all managed heap pages.
// mi_segment_t : a larger chunk of memory (32GiB) from where pages
// are allocated.
// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from
// where objects are allocated.
// where objects of a single size are allocated.
// Note: we write "OS page" for OS memory pages while
// using plain "page" for mimalloc pages (`mi_page_t`).
// --------------------------------------------------------------------------
@ -23,11 +21,9 @@ terms of the MIT license. A copy of the license can be found in the file
#include <stddef.h> // ptrdiff_t
#include <stdint.h> // uintptr_t, uint16_t, etc
#include "atomic.h" // _Atomic
#ifdef _MSC_VER
#pragma warning(disable:4214) // bitfield is not int
#endif
#include <errno.h> // error codes
#include "bits.h" // size defines (MI_INTPTR_SIZE etc), bit operations
#include "atomic.h" // _Atomic primitives
// Minimal alignment necessary. On most platforms 16 bytes are needed
// due to SSE registers for example. This must be at least `sizeof(void*)`
@ -50,11 +46,17 @@ terms of the MIT license. A copy of the license can be found in the file
// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance).
// #define MI_STAT 1
// Define MI_SECURE to enable security mitigations
// #define MI_SECURE 1 // guard page around metadata
// #define MI_SECURE 2 // guard page around each mimalloc page
// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
// #define MI_SECURE 4 // checks for double free. (may be more expensive)
// Define MI_SECURE to enable security mitigations. Level 1 has minimal performance impact,
// but protects most metadata with guard pages:
// #define MI_SECURE 1 // guard page around metadata
//
// Level 2 has more performance impact but protect well against various buffer overflows
// by surrounding all mimalloc pages with guard pages:
// #define MI_SECURE 2 // guard page around each mimalloc page (can fragment VMA's with large heaps..)
//
// The next two levels can have more performance cost:
// #define MI_SECURE 3 // randomize allocations, encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
// #define MI_SECURE 4 // checks for double free. (may be more expensive)
#if !defined(MI_SECURE)
#define MI_SECURE 0
@ -97,124 +99,130 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_ENCODE_FREELIST 1
#endif
// Enable large pages for objects between 128KiB and 512KiB. Disabled by default.
#ifndef MI_ENABLE_LARGE_PAGES
#define MI_ENABLE_LARGE_PAGES 0
#endif
// We used to abandon huge pages in order to eagerly deallocate it if freed from another thread.
// Unfortunately, that makes it not possible to visit them during a heap walk or include them in a
// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks nowadays if freed from
// another thread so the memory becomes "virtually" available (and eventually gets properly freed by
// the owning thread).
// #define MI_HUGE_PAGE_ABANDON 1
// --------------------------------------------------------------
// Sizes of internal data-structures
// (comments specify sizes on 64-bit, usually 32-bit is halved)
// --------------------------------------------------------------
// ------------------------------------------------------
// Platform specific values
// ------------------------------------------------------
// ------------------------------------------------------
// Size of a pointer.
// We assume that `sizeof(void*)==sizeof(intptr_t)`
// and it holds for all platforms we know of.
//
// However, the C standard only requires that:
// p == (void*)((intptr_t)p))
// but we also need:
// i == (intptr_t)((void*)i)
// or otherwise one might define an intptr_t type that is larger than a pointer...
// ------------------------------------------------------
#if INTPTR_MAX > INT64_MAX
# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example)
#elif INTPTR_MAX == INT64_MAX
# define MI_INTPTR_SHIFT (3)
#elif INTPTR_MAX == INT32_MAX
# define MI_INTPTR_SHIFT (2)
// Sizes are for 64-bit
#ifndef MI_ARENA_SLICE_SHIFT
#ifdef MI_SMALL_PAGE_SHIFT // backward compatibility
#define MI_ARENA_SLICE_SHIFT MI_SMALL_PAGE_SHIFT
#else
#error platform pointers must be 32, 64, or 128 bits
#define MI_ARENA_SLICE_SHIFT (13 + MI_SIZE_SHIFT) // 64 KiB (32 KiB on 32-bit)
#endif
#endif
#ifndef MI_BCHUNK_BITS_SHIFT
#define MI_BCHUNK_BITS_SHIFT (6 + MI_SIZE_SHIFT) // optimized for 512 bits per chunk (avx512)
#endif
#if SIZE_MAX == UINT64_MAX
# define MI_SIZE_SHIFT (3)
typedef int64_t mi_ssize_t;
#elif SIZE_MAX == UINT32_MAX
# define MI_SIZE_SHIFT (2)
typedef int32_t mi_ssize_t;
#else
#error platform objects must be 32 or 64 bits
#endif
#define MI_BCHUNK_BITS (1 << MI_BCHUNK_BITS_SHIFT) // sub-bitmaps are "bchunks" of 512 bits
#define MI_ARENA_SLICE_SIZE (MI_ZU(1) << MI_ARENA_SLICE_SHIFT) // arena's allocate in slices of 64 KiB
#define MI_ARENA_SLICE_ALIGN (MI_ARENA_SLICE_SIZE)
#if (SIZE_MAX/2) > LONG_MAX
# define MI_ZU(x) x##ULL
# define MI_ZI(x) x##LL
#else
# define MI_ZU(x) x##UL
# define MI_ZI(x) x##L
#endif
#define MI_ARENA_MIN_OBJ_SLICES (1)
#define MI_ARENA_MAX_OBJ_SLICES (MI_BCHUNK_BITS) // 32 MiB (for now, cannot cross chunk boundaries)
#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_MIN_OBJ_SLICES * MI_ARENA_SLICE_SIZE)
#define MI_ARENA_MAX_OBJ_SIZE (MI_ARENA_MAX_OBJ_SLICES * MI_ARENA_SLICE_SIZE)
#define MI_SIZE_SIZE (1<<MI_SIZE_SHIFT)
#define MI_SIZE_BITS (MI_SIZE_SIZE*8)
#define MI_SMALL_PAGE_SIZE MI_ARENA_MIN_OBJ_SIZE // 64 KiB
#define MI_MEDIUM_PAGE_SIZE (8*MI_SMALL_PAGE_SIZE) // 512 KiB (=byte in the bchunk bitmap)
#define MI_LARGE_PAGE_SIZE (MI_SIZE_SIZE*MI_MEDIUM_PAGE_SIZE) // 4 MiB (=word in the bchunk bitmap)
#define MI_KiB (MI_ZU(1024))
#define MI_MiB (MI_KiB*MI_KiB)
#define MI_GiB (MI_MiB*MI_KiB)
// ------------------------------------------------------
// Main internal data-structures
// ------------------------------------------------------
// Main tuning parameters for segment and page sizes
// Sizes for 64-bit, divide by two for 32-bit
#ifndef MI_SMALL_PAGE_SHIFT
#define MI_SMALL_PAGE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB
#endif
#ifndef MI_MEDIUM_PAGE_SHIFT
#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB
#endif
#ifndef MI_LARGE_PAGE_SHIFT
#define MI_LARGE_PAGE_SHIFT ( 3 + MI_MEDIUM_PAGE_SHIFT) // 4MiB
#endif
#ifndef MI_SEGMENT_SHIFT
#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4MiB -- must be equal to `MI_LARGE_PAGE_SHIFT`
#endif
// Derived constants
#define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT)
#define MI_SEGMENT_ALIGN (MI_SEGMENT_SIZE)
#define MI_SEGMENT_MASK ((uintptr_t)(MI_SEGMENT_ALIGN - 1))
#define MI_SMALL_PAGE_SIZE (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
#define MI_MEDIUM_PAGE_SIZE (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)
#define MI_LARGE_PAGE_SIZE (MI_ZU(1)<<MI_LARGE_PAGE_SHIFT)
#define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE)
#define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
#define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE)
// The max object size are checked to not waste more than 12.5% internally over the page sizes.
// (Except for large pages since huge objects are allocated in 4MiB chunks)
#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 16KiB
#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB
#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2MiB
#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
// Maximum number of size classes. (spaced exponentially in 12.5% increments)
#define MI_BIN_HUGE (73U)
#define MI_BIN_FULL (MI_BIN_HUGE+1)
#define MI_BIN_COUNT (MI_BIN_FULL+1)
#if (MI_LARGE_OBJ_WSIZE_MAX >= 655360)
#error "mimalloc internal: define more bins"
#endif
// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`)
#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX)
// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments
#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
// We never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
// Minimal commit for a page on-demand commit (should be >= OS page size)
#define MI_PAGE_MIN_COMMIT_SIZE MI_ARENA_SLICE_SIZE // (4*MI_KiB)
// ------------------------------------------------------
// Arena's are large reserved areas of memory allocated from
// the OS that are managed by mimalloc to efficiently
// allocate MI_ARENA_SLICE_SIZE slices of memory for the
// mimalloc pages.
// ------------------------------------------------------
// A large memory arena where pages are allocated in.
typedef struct mi_arena_s mi_arena_t; // defined in `arena.c`
// ---------------------------------------------------------------
// a memory id tracks the provenance of arena/OS allocated memory
// ---------------------------------------------------------------
// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated.
// The memid keeps track of this.
typedef enum mi_memkind_e {
MI_MEM_NONE, // not allocated
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
MI_MEM_STATIC, // allocated in a static area and should not be freed (the initial main heap data for example (`init.c`))
MI_MEM_META, // allocated with the meta data allocator (`arena-meta.c`)
MI_MEM_OS, // allocated from the OS
MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
MI_MEM_ARENA // allocated from an arena (the usual case) (`arena.c`)
} mi_memkind_t;
static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
}
static inline bool mi_memkind_needs_no_free(mi_memkind_t memkind) {
return (memkind <= MI_MEM_STATIC);
}
typedef struct mi_memid_os_info {
void* base; // actual base address of the block (used for offset aligned allocations)
size_t size; // allocated full size
// size_t alignment; // alignment at allocation
} mi_memid_os_info_t;
typedef struct mi_memid_arena_info {
mi_arena_t* arena; // arena that contains this memory
uint32_t slice_index; // slice index in the arena
uint32_t slice_count; // allocated slices
} mi_memid_arena_info_t;
typedef struct mi_memid_meta_info {
void* meta_page; // meta-page that contains the block
uint32_t block_index; // block index in the meta-data page
uint32_t block_count; // allocated blocks
} mi_memid_meta_info_t;
typedef struct mi_memid_s {
union {
mi_memid_os_info_t os; // only used for MI_MEM_OS
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
mi_memid_meta_info_t meta; // only used for MI_MEM_META
} mem;
mi_memkind_t memkind;
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
bool initially_committed;// `true` if the memory was originally allocated as committed
bool initially_zero; // `true` if the memory was originally zero initialized
} mi_memid_t;
static inline bool mi_memid_is_os(mi_memid_t memid) {
return mi_memkind_is_os(memid.memkind);
}
static inline bool mi_memid_needs_no_free(mi_memid_t memid) {
return mi_memkind_needs_no_free(memid.memkind);
}
// ------------------------------------------------------
// Mimalloc pages contain allocated blocks
@ -232,48 +240,28 @@ typedef struct mi_block_s {
mi_encoded_t next;
} mi_block_t;
#if MI_GUARDED
// we always align guarded pointers in a block at an offset
// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones
#define MI_BLOCK_TAG_ALIGNED ((mi_encoded_t)(0))
#define MI_BLOCK_TAG_GUARDED (~MI_BLOCK_TAG_ALIGNED)
#endif
// The `in_full` and `has_aligned` page flags are put in the bottom bits of the thread_id (for fast test in `mi_free`)
// `has_aligned` is true if the page has pointers at an offset in a block (so we unalign before free-ing)
// `in_full_queue` is true if the page is full and resides in the full queue (so we move it to a regular queue on free-ing)
#define MI_PAGE_IN_FULL_QUEUE MI_ZU(0x01)
#define MI_PAGE_HAS_ALIGNED MI_ZU(0x02)
#define MI_PAGE_IS_ABANDONED_MAPPED MI_ZU(0x04)
#define MI_PAGE_FLAG_MASK MI_ZU(0x07)
typedef size_t mi_page_flags_t;
// The delayed flags are used for efficient multi-threaded free-ing
typedef enum mi_delayed_e {
MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list
MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap
MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
MI_NEVER_DELAYED_FREE = 3 // sticky: used for abandoned pages without a owning heap; this only resets on page reclaim
} mi_delayed_t;
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
#if !MI_TSAN
typedef union mi_page_flags_s {
uint8_t full_aligned;
struct {
uint8_t in_full : 1;
uint8_t has_aligned : 1;
} x;
} mi_page_flags_t;
#else
// under thread sanitizer, use a byte for each flag to suppress warning, issue #130
typedef union mi_page_flags_s {
uint32_t full_aligned;
struct {
uint8_t in_full;
uint8_t has_aligned;
} x;
} mi_page_flags_t;
#endif
// Thread free list.
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
// Points to a list of blocks that are freed by other threads.
// The low-bit is set if the page is owned by the current thread. (`mi_page_is_owned`).
// Ownership is required before we can read any non-atomic fields in the page.
// This way we can push a block on the thread free list and try to claim ownership
// atomically in `free.c:mi_free_block_mt`.
typedef uintptr_t mi_thread_free_t;
// A heap can serve only specific objects signified by its heap tag (e.g. various object types in CPython)
typedef uint8_t mi_heaptag_t;
// A page contains blocks of one specific size (`block_size`).
// Each page has three list of free blocks:
// `free` for blocks that can be allocated,
@ -291,160 +279,93 @@ typedef uintptr_t mi_thread_free_t;
// the number of memory accesses in the `mi_page_all_free` function(s).
//
// Notes:
// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
// - Non-atomic fields can only be accessed if having ownership (low bit of `xthread_free`).
// - If a page is not part of a heap it is called "abandoned" (`heap==NULL`) -- in
// that case the `xthreadid` is 0 or 1 (1 is for abandoned pages that
// are in the abandoned page lists of an arena, these are called "mapped" abandoned pages).
// - The layout is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
// - Using `uint16_t` does not seem to slow things down
// - The size is 10 words on 64-bit which helps the page index calculations
// (and 12 words on 32-bit, and encoded free lists add 2 words)
// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize
// concurrent frees where only the first concurrent free adds to the owning
// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`).
// The invariant is that no-delayed-free is only set if there is
// at least one block that will be added, or as already been added, to
// the owning heap `thread_delayed_free` list. This guarantees that pages
// will be freed correctly even if only other threads free blocks.
typedef struct mi_page_s {
// "owned" by the segment
uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]`
uint8_t segment_in_use:1; // `true` if the segment allocated this page
uint8_t is_committed:1; // `true` if the page virtual memory is committed
uint8_t is_zero_init:1; // `true` if the page was initially zero initialized
uint8_t is_huge:1; // `true` if the page is in a huge segment
_Atomic(mi_threadid_t) xthread_id; // thread this page belongs to. (= heap->thread_id, or 0 or 1 if abandoned)
// layout like this to optimize access in `mi_malloc` and `mi_free`
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
uint16_t reserved; // number of blocks reserved in memory
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire:7; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint16_t capacity; // number of blocks committed (must be the first field for proper zero-initialisation)
uint16_t reserved; // number of blocks reserved in memory
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t retire_expire; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
// padding
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the page area containing the blocks
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the blocks
mi_heaptag_t heap_tag; // tag of the owning heap, used to separate heaps by object type
bool free_is_zero; // `true` if the blocks in the free list are zero initialized
// padding
#if (MI_ENCODE_FREELIST || MI_PADDING)
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
#endif
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(uintptr_t) xheap;
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
#if MI_INTPTR_SIZE==4 // pad to 12 words on 32-bit
void* padding[1];
#endif
mi_heap_t* heap; // the heap owning this page (or NULL for abandoned pages)
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
size_t slice_committed; // committed size relative to the first arena slice of the page data (or 0 if the page is fully committed already)
mi_memid_t memid; // provenance of the page memory
} mi_page_t;
// ------------------------------------------------------
// Object sizes
// ------------------------------------------------------
#define MI_PAGE_ALIGN MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map.
#define MI_PAGE_MIN_START_BLOCK_ALIGN MI_MAX_ALIGN_SIZE // minimal block alignment for the first block in a page (16b)
#define MI_PAGE_MAX_START_BLOCK_ALIGN2 MI_KiB // maximal block alignment for "power of 2"-sized blocks (such that we guarantee natural alignment)
#define MI_PAGE_MAX_OVERALLOC_ALIGN MI_ARENA_SLICE_SIZE // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation
#if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8
#define MI_PAGE_INFO_SIZE ((MI_INTPTR_SHIFT+2)*32) // 160 >= sizeof(mi_page_t)
#else
#define MI_PAGE_INFO_SIZE ((MI_INTPTR_SHIFT+1)*32) // 128/96 >= sizeof(mi_page_t)
#endif
// The max object size are checked to not waste more than 12.5% internally over the page sizes.
// (Except for large pages since huge objects are allocated in 4MiB chunks)
#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 8 KiB
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 64 KiB
#define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/4) // <= 512 KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
#define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)
#if (MI_LARGE_MAX_OBJ_WSIZE >= 655360)
#error "mimalloc internal: define more bins"
#endif
// ------------------------------------------------------
// Mimalloc segments contain mimalloc pages
// Page kinds
// ------------------------------------------------------
typedef enum mi_page_kind_e {
MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment
MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment
MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment
MI_PAGE_HUGE // a huge page is a single page in a segment of variable size (but still 2MiB aligned)
// used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an alignment `> MI_BLOCK_ALIGNMENT_MAX`.
MI_PAGE_SMALL, // small blocks go into 64KiB pages
MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages
MI_PAGE_LARGE, // larger blocks go into 4MiB pages
MI_PAGE_SINGLETON // page containing a single block.
// used for blocks `> MI_LARGE_MAX_OBJ_SIZE` or an aligment `> MI_PAGE_MAX_OVERALLOC_ALIGN`.
} mi_page_kind_t;
// ---------------------------------------------------------------
// a memory id tracks the provenance of arena/OS allocated memory
// ---------------------------------------------------------------
// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
typedef enum mi_memkind_e {
MI_MEM_NONE, // not allocated
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
MI_MEM_OS, // allocated from the OS
MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
MI_MEM_ARENA // allocated from an arena (the usual case)
} mi_memkind_t;
static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
}
typedef struct mi_memid_os_info {
void* base; // actual base address of the block (used for offset aligned allocations)
size_t size; // full allocation size
} mi_memid_os_info_t;
typedef struct mi_memid_arena_info {
size_t block_index; // index in the arena
mi_arena_id_t id; // arena id (>= 1)
bool is_exclusive; // this arena can only be used for specific arena allocations
} mi_memid_arena_info_t;
typedef struct mi_memid_s {
union {
mi_memid_os_info_t os; // only used for MI_MEM_OS
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
} mem;
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
bool initially_committed;// `true` if the memory was originally allocated as committed
bool initially_zero; // `true` if the memory was originally zero initialized
mi_memkind_t memkind;
} mi_memid_t;
// ---------------------------------------------------------------
// Segments contain mimalloc pages
// ---------------------------------------------------------------
typedef struct mi_subproc_s mi_subproc_t;
// Segments are large allocated memory blocks (2MiB on 64 bit) from the OS.
// Inside segments we allocated fixed size _pages_ that contain blocks.
typedef struct mi_segment_s {
// constant fields
mi_memid_t memid; // memory id to track provenance
bool allow_decommit;
bool allow_purge;
size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE`
mi_subproc_t* subproc; // segment belongs to sub process
// segment fields
struct mi_segment_s* next; // must be the first (non-constant) segment field -- see `segment.c:segment_init`
struct mi_segment_s* prev;
bool was_reclaimed; // true if it was reclaimed (used to limit reclaim-on-free reclamation)
bool dont_free; // can be temporarily true to ensure the segment is not freed
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
size_t abandoned_visits; // count how often this segment is visited for reclaiming (to force reclaim if it is too long)
size_t used; // count of pages in use (`used <= capacity`)
size_t capacity; // count of available pages (`#free + used`)
size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages.
uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
struct mi_segment_s* abandoned_os_next; // only used for abandoned segments outside arena's, and only if `mi_option_visit_abandoned` is enabled
struct mi_segment_s* abandoned_os_prev;
// layout like this to optimize access in `mi_free`
_Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
mi_page_kind_t page_kind; // kind of pages: small, medium, large, or huge
mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
} mi_segment_t;
// ------------------------------------------------------
// Heaps
//
// Provide first-class heaps to allocate from.
// A heap just owns a set of pages for allocation and
// can only be allocate/reallocate from the thread that created it.
// Freeing blocks can be done from any thread though.
// Per thread, the segments are shared among its heaps.
//
// Per thread, there is always a default heap that is
// used for allocation; it is initialized to statically
// point to an empty heap to avoid initialization checks
@ -461,8 +382,6 @@ typedef struct mi_page_queue_s {
size_t block_size;
} mi_page_queue_t;
#define MI_BIN_FULL (MI_BIN_HUGE+1)
// Random context
typedef struct mi_random_cxt_s {
uint32_t input[16];
@ -473,7 +392,7 @@ typedef struct mi_random_cxt_s {
// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows
#if (MI_PADDING)
#if MI_PADDING
typedef struct mi_padding_s {
uint32_t canary; // encoded block value to check validity of the padding (in case of overflow)
uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes)
@ -490,18 +409,18 @@ typedef struct mi_padding_s {
// A heap owns a set of pages.
struct mi_heap_s {
mi_tld_t* tld;
_Atomic(mi_block_t*) thread_delayed_free;
mi_threadid_t thread_id; // thread this heap belongs too
mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
mi_tld_t* tld; // thread-local data
mi_arena_t* exclusive_arena; // if the heap should only allocate from a specific arena (or NULL)
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list
mi_random_ctx_t random; // random number context used for secure allocation
size_t page_count; // total number of pages in the `pages` queues.
size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues)
size_t page_retired_max; // largest retired index into the `pages` array.
size_t generic_count; // how often is mimalloc_generic invoked?
mi_heap_t* next; // list of heaps per thread
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
long full_page_retain; // how many full pages can be retained per queue (before abondoning them)
bool allow_page_reclaim; // `true` if this heap should not reclaim abandoned pages
bool allow_page_abandon; // `true` if this heap can abandon pages to reduce memory footprint
uint8_t tag; // custom tag, can be used for separating heaps based on the object types
#if MI_GUARDED
size_t guarded_size_min; // minimal size for guarded objects
@ -511,45 +430,11 @@ struct mi_heap_s {
size_t guarded_sample_count; // current sample count (counting down to 0)
#endif
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
mi_page_queue_t pages[MI_BIN_COUNT]; // queue of pages for each size class (or "bin")
mi_memid_t memid; // provenance of the heap struct itself (meta or os)
};
// ------------------------------------------------------
// Debug
// ------------------------------------------------------
#if !defined(MI_DEBUG_UNINIT)
#define MI_DEBUG_UNINIT (0xD0)
#endif
#if !defined(MI_DEBUG_FREED)
#define MI_DEBUG_FREED (0xDF)
#endif
#if !defined(MI_DEBUG_PADDING)
#define MI_DEBUG_PADDING (0xDE)
#endif
#if (MI_DEBUG)
// use our own assertion to print without memory allocation
void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func );
#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__))
#else
#define mi_assert(x)
#endif
#if (MI_DEBUG>1)
#define mi_assert_internal mi_assert
#else
#define mi_assert_internal(x)
#endif
#if (MI_DEBUG>2)
#define mi_assert_expensive mi_assert
#else
#define mi_assert_expensive(x)
#endif
// ------------------------------------------------------
// Statistics
// ------------------------------------------------------
@ -575,82 +460,118 @@ typedef struct mi_stat_counter_s {
} mi_stat_counter_t;
typedef struct mi_stats_s {
mi_stat_count_t segments;
mi_stat_count_t pages;
mi_stat_count_t reserved;
mi_stat_count_t committed;
mi_stat_count_t reset;
mi_stat_count_t purged;
mi_stat_count_t page_committed;
mi_stat_count_t segments_abandoned;
mi_stat_count_t pages_abandoned;
mi_stat_count_t threads;
mi_stat_count_t normal;
mi_stat_count_t huge;
mi_stat_count_t giant;
mi_stat_count_t malloc;
mi_stat_count_t segments_cache;
mi_stat_count_t pages;
mi_stat_count_t reserved;
mi_stat_count_t committed;
mi_stat_count_t reset;
mi_stat_count_t purged;
mi_stat_count_t page_committed;
mi_stat_count_t pages_abandoned;
mi_stat_count_t threads;
mi_stat_count_t normal;
mi_stat_count_t huge;
mi_stat_count_t giant;
mi_stat_count_t malloc;
mi_stat_counter_t pages_extended;
mi_stat_counter_t pages_reclaim_on_alloc;
mi_stat_counter_t pages_reclaim_on_free;
mi_stat_counter_t pages_reabandon_full;
mi_stat_counter_t pages_unabandon_busy_wait;
mi_stat_counter_t mmap_calls;
mi_stat_counter_t commit_calls;
mi_stat_counter_t reset_calls;
mi_stat_counter_t purge_calls;
mi_stat_counter_t arena_purges;
mi_stat_counter_t page_no_retire;
mi_stat_counter_t searches;
mi_stat_counter_t normal_count;
mi_stat_counter_t huge_count;
mi_stat_counter_t arena_count;
mi_stat_counter_t arena_crossover_count;
mi_stat_counter_t arena_rollback_count;
mi_stat_counter_t guarded_alloc_count;
#if MI_STAT>1
mi_stat_count_t normal_bins[MI_BIN_HUGE+1];
mi_stat_count_t normal_bins[MI_BIN_COUNT];
#endif
} mi_stats_t;
// add to stat keeping track of the peak
void _mi_stat_increase(mi_stat_count_t* stat, size_t amount);
void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
void __mi_stat_increase(mi_stat_count_t* stat, size_t amount);
void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount);
void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount);
// adjust stat in special cases to compensate for double counting
void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount);
void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount);
void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc);
void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_free);
void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc);
void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount, bool on_free);
// counters can just be increased
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount);
#if (MI_STAT)
#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount)
#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount)
#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount)
#define mi_stat_adjust_increase(stat,amount) _mi_stat_adjust_increase( &(stat), amount)
#define mi_stat_adjust_decrease(stat,amount) _mi_stat_adjust_decrease( &(stat), amount)
#define mi_debug_stat_increase(stat,amount) __mi_stat_increase( &(stat), amount)
#define mi_debug_stat_decrease(stat,amount) __mi_stat_decrease( &(stat), amount)
#define mi_debug_stat_counter_increase(stat,amount) __mi_stat_counter_increase( &(stat), amount)
#define mi_debug_stat_increase_mt(stat,amount) __mi_stat_increase_mt( &(stat), amount)
#define mi_debug_stat_decrease_mt(stat,amount) __mi_stat_decrease_mt( &(stat), amount)
#define mi_debug_stat_counter_increase_mt(stat,amount) __mi_stat_counter_increase_mt( &(stat), amount)
#define mi_debug_stat_adjust_increase_mt(stat,amnt,b) __mi_stat_adjust_increase_mt( &(stat), amnt, b)
#define mi_debug_stat_adjust_decrease_mt(stat,amnt,b) __mi_stat_adjust_decrease_mt( &(stat), amnt, b)
#else
#define mi_stat_increase(stat,amount) ((void)0)
#define mi_stat_decrease(stat,amount) ((void)0)
#define mi_stat_counter_increase(stat,amount) ((void)0)
#define mi_stat_adjuct_increase(stat,amount) ((void)0)
#define mi_stat_adjust_decrease(stat,amount) ((void)0)
#define mi_debug_stat_increase(stat,amount) ((void)0)
#define mi_debug_stat_decrease(stat,amount) ((void)0)
#define mi_debug_stat_counter_increase(stat,amount) ((void)0)
#define mi_debug_stat_increase_mt(stat,amount) ((void)0)
#define mi_debug_stat_decrease_mt(stat,amount) ((void)0)
#define mi_debug_stat_counter_increase_mt(stat,amount) ((void)0)
#define mi_debug_stat_adjust_increase(stat,amnt,b) ((void)0)
#define mi_debug_stat_adjust_decrease(stat,amnt,b) ((void)0)
#endif
#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
#define mi_subproc_stat_counter_increase(subproc,stat,amount) __mi_stat_counter_increase_mt( &(subproc)->stats.stat, amount)
#define mi_subproc_stat_increase(subproc,stat,amount) __mi_stat_increase_mt( &(subproc)->stats.stat, amount)
#define mi_subproc_stat_decrease(subproc,stat,amount) __mi_stat_decrease_mt( &(subproc)->stats.stat, amount)
#define mi_subproc_stat_adjust_increase(subproc,stat,amnt,b) __mi_stat_adjust_increase_mt( &(subproc)->stats.stat, amnt, b)
#define mi_subproc_stat_adjust_decrease(subproc,stat,amnt,b) __mi_stat_adjust_decrease_mt( &(subproc)->stats.stat, amnt, b)
#define mi_os_stat_counter_increase(stat,amount) mi_subproc_stat_counter_increase(_mi_subproc(),stat,amount)
#define mi_os_stat_increase(stat,amount) mi_subproc_stat_increase(_mi_subproc(),stat,amount)
#define mi_os_stat_decrease(stat,amount) mi_subproc_stat_decrease(_mi_subproc(),stat,amount)
#define mi_heap_stat_counter_increase(heap,stat,amount) __mi_stat_counter_increase( &(heap)->tld->stats.stat, amount)
#define mi_heap_stat_increase(heap,stat,amount) __mi_stat_increase( &(heap)->tld->stats.stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) __mi_stat_decrease( &(heap)->tld->stats.stat, amount)
#define mi_debug_heap_stat_counter_increase(heap,stat,amount) mi_debug_stat_counter_increase( (heap)->tld->stats.stat, amount)
#define mi_debug_heap_stat_increase(heap,stat,amount) mi_debug_stat_increase( (heap)->tld->stats.stat, amount)
#define mi_debug_heap_stat_decrease(heap,stat,amount) mi_debug_stat_decrease( (heap)->tld->stats.stat, amount)
// ------------------------------------------------------
// Sub processes do not reclaim or visit segments
// from other sub processes
// Sub processes use separate arena's and no heaps/pages/blocks
// are shared between sub processes.
// The subprocess structure contains essentially all static variables (except per subprocess :-))
//
// Each thread should belong to one sub-process only
// ------------------------------------------------------
struct mi_subproc_s {
_Atomic(size_t) abandoned_count; // count of abandoned segments for this sub-process
_Atomic(size_t) abandoned_os_list_count; // count of abandoned segments in the os-list
mi_lock_t abandoned_os_lock; // lock for the abandoned os segment list (outside of arena's) (this lock protect list operations)
mi_lock_t abandoned_os_visit_lock; // ensure only one thread per subproc visits the abandoned os list
mi_segment_t* abandoned_os_list; // doubly-linked list of abandoned segments outside of arena's (in OS allocated memory)
mi_segment_t* abandoned_os_list_tail; // the tail-end of the list
mi_memid_t memid; // provenance of this memory block
};
#define MI_MAX_ARENAS (160) // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`)
// 160 arenas is enough for ~2 TiB memory
typedef struct mi_subproc_s {
_Atomic(size_t) arena_count; // current count of arena's
_Atomic(mi_arena_t*) arenas[MI_MAX_ARENAS]; // arena's of this sub-process
mi_lock_t arena_reserve_lock; // lock to ensure arena's get reserved one at a time
_Atomic(int64_t) purge_expire; // expiration is set if any arenas can be purged
_Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process
mi_page_t* os_abandoned_pages; // list of pages that OS allocated and not in an arena (only used if `mi_option_visit_abandoned` is on)
mi_lock_t os_abandoned_pages_lock; // lock for the os abandoned pages list (this lock protects list operations)
mi_memid_t memid; // provenance of this memory block (meta or OS)
mi_stats_t stats; // sub-process statistics (tld stats are merged in on thread termination)
} mi_subproc_t;
// ------------------------------------------------------
// Thread Local data
@ -659,34 +580,57 @@ struct mi_subproc_s {
// Milliseconds as in `int64_t` to avoid overflows
typedef int64_t mi_msecs_t;
// Queue of segments
typedef struct mi_segment_queue_s {
mi_segment_t* first;
mi_segment_t* last;
} mi_segment_queue_t;
// Segments thread local data
typedef struct mi_segments_tld_s {
mi_segment_queue_t small_free; // queue of segments with free small pages
mi_segment_queue_t medium_free; // queue of segments with free medium pages
mi_page_queue_t pages_purge; // queue of freed pages that are delay purged
size_t count; // current number of segments;
size_t peak_count; // peak number of segments
size_t current_size; // current size of all segments
size_t peak_size; // peak size of all segments
size_t reclaim_count;// number of reclaimed (abandoned) segments
mi_subproc_t* subproc; // sub-process this thread belongs to.
mi_stats_t* stats; // points to tld stats
} mi_segments_tld_t;
// Thread local data
struct mi_tld_s {
unsigned long long heartbeat; // monotonic heartbeat count
bool recurse; // true if deferred was called; used to prevent infinite recursion.
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
mi_segments_tld_t segments; // segment tld
mi_stats_t stats; // statistics
mi_threadid_t thread_id; // thread id of this thread
size_t thread_seq; // thread sequence id (linear count of created threads)
mi_subproc_t* subproc; // sub-process this thread belongs to.
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
unsigned long long heartbeat; // monotonic heartbeat count
bool recurse; // true if deferred was called; used to prevent infinite recursion.
bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks)
mi_stats_t stats; // statistics
mi_memid_t memid; // provenance of the tld memory itself (meta or OS)
};
/* -----------------------------------------------------------
Error codes passed to `_mi_fatal_error`
All are recoverable but EFAULT is a serious error and aborts by default in secure mode.
For portability define undefined error codes using common Unix codes:
<https://www-numi.fnal.gov/offline_software/srt_public_context/WebDocs/Errors/unix_system_errors.html>
----------------------------------------------------------- */
#ifndef EAGAIN // double free
#define EAGAIN (11)
#endif
#ifndef ENOMEM // out of memory
#define ENOMEM (12)
#endif
#ifndef EFAULT // corrupted free-list or meta-data
#define EFAULT (14)
#endif
#ifndef EINVAL // trying to free an invalid pointer
#define EINVAL (22)
#endif
#ifndef EOVERFLOW // count*size overflow
#define EOVERFLOW (75)
#endif
// ------------------------------------------------------
// Debug
// ------------------------------------------------------
#ifndef MI_DEBUG_UNINIT
#define MI_DEBUG_UNINIT (0xD0)
#endif
#ifndef MI_DEBUG_FREED
#define MI_DEBUG_FREED (0xDF)
#endif
#ifndef MI_DEBUG_PADDING
#define MI_DEBUG_PADDING (0xDE)
#endif
#endif // MI_TYPES_H

View file

@ -16,21 +16,22 @@ terms of the MIT license. A copy of the license can be found in the file
// ------------------------------------------------------
static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) {
// objects up to `MI_MAX_ALIGN_GUARANTEE` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`).
// objects up to `MI_PAGE_MIN_BLOCK_ALIGN` are always allocated aligned to their size
mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0));
if (alignment > size) return false;
if (alignment <= MI_MAX_ALIGN_SIZE) return true;
const size_t bsize = mi_good_size(size);
return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0);
const bool ok = (bsize <= MI_PAGE_MAX_START_BLOCK_ALIGN2 && _mi_is_power_of_two(bsize));
if (ok) { mi_assert_internal((bsize & (alignment-1)) == 0); } // since both power of 2 and alignment <= size
return ok;
}
#if MI_GUARDED
static mi_decl_restrict void* mi_heap_malloc_guarded_aligned(mi_heap_t* heap, size_t size, size_t alignment, bool zero) mi_attr_noexcept {
// use over allocation for guarded blocksl
mi_assert_internal(alignment > 0 && alignment < MI_BLOCK_ALIGNMENT_MAX);
mi_assert_internal(alignment > 0 && alignment < MI_PAGE_MAX_OVERALLOC_ALIGN);
const size_t oversize = size + alignment - 1;
void* base = _mi_heap_malloc_guarded(heap, oversize, zero);
void* p = mi_align_up_ptr(base, alignment);
void* p = _mi_align_up_ptr(base, alignment);
mi_track_align(base, p, (uint8_t*)p - (uint8_t*)base, size);
mi_assert_internal(mi_usable_size(p) >= size);
mi_assert_internal(_mi_is_aligned(p, alignment));
@ -59,21 +60,20 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t
void* p;
size_t oversize;
if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) {
// use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
// This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the
// first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
if mi_unlikely(alignment > MI_PAGE_MAX_OVERALLOC_ALIGN) {
// use OS allocation for large alignments and allocate inside a singleton page (not in an arena)
// This can support alignments >= MI_PAGE_ALIGN by ensuring the object can be aligned
// in the first (and single) page such that the page info is `MI_PAGE_ALIGN` bytes before it (and can be found in the _mi_page_map).
if mi_unlikely(offset != 0) {
// todo: cannot support offset alignment for very large alignments yet
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
#endif
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation with a large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
#endif
return NULL;
}
oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
// note: no guarded as alignment > 0
p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block
// zero afterwards as only the area from the aligned_p may be committed!
p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); // the page block size should be large enough to align in the single huge page block
if (p == NULL) return NULL;
}
else {
@ -114,13 +114,13 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t
#endif
// now zero the block if needed
if (alignment > MI_BLOCK_ALIGNMENT_MAX) {
// for the tracker, on huge aligned allocations only from the start of the large block is defined
mi_track_mem_undefined(aligned_p, size);
if (zero) {
_mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p));
}
}
//if (alignment > MI_PAGE_MAX_OVERALLOC_ALIGN) {
// // for the tracker, on huge aligned allocations only from the start of the large block is defined
// mi_track_mem_undefined(aligned_p, size);
// if (zero) {
// _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p));
// }
//}
if (p != aligned_p) {
mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p));
@ -177,12 +177,14 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
}
#if MI_GUARDED
if (offset==0 && alignment < MI_BLOCK_ALIGNMENT_MAX && mi_heap_malloc_use_guarded(heap,size)) {
if (offset==0 && alignment < MI_PAGE_MAX_OVERALLOC_ALIGN && mi_heap_malloc_use_guarded(heap,size)) {
return mi_heap_malloc_guarded_aligned(heap, size, alignment, zero);
}
#endif
// try first if there happens to be a small block available with just the right alignment
// since most small power-of-2 blocks (under MI_PAGE_MAX_BLOCK_START_ALIGN2) are already
// naturally aligned this can be often the case.
if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) {
const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
const size_t padsize = size + MI_PADDING_SIZE;
@ -191,9 +193,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0;
if mi_likely(is_aligned)
{
#if MI_STAT>1
mi_heap_stat_increase(heap, malloc, size);
#endif
mi_debug_heap_stat_increase(heap, malloc, size);
void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen
mi_assert_internal(p != NULL);
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);

View file

@ -30,7 +30,11 @@ terms of the MIT license. A copy of the license can be found in the file
// Note: in release mode the (inlined) routine is about 7 instructions with a single test.
extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
{
mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
if (page->block_size != 0) { // not the empty heap
mi_assert_internal(mi_page_block_size(page) >= size);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
}
// check the free list
mi_block_t* const block = page->free;
@ -82,7 +86,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
#if (MI_STAT>0)
const size_t bsize = mi_page_usable_block_size(page);
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_increase(heap, normal, bsize);
mi_heap_stat_counter_increase(heap, normal_count, 1);
#if (MI_STAT>1)
@ -130,7 +134,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
mi_assert(size <= MI_SMALL_SIZE_MAX);
#if MI_DEBUG
const uintptr_t tid = _mi_thread_id();
mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local
mi_assert(heap->tld->thread_id == 0 || heap->tld->thread_id == tid); // heaps are thread local
#endif
#if (MI_PADDING || MI_GUARDED)
if (size == 0) { size = sizeof(void*); }
@ -184,7 +188,7 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z
else {
// regular allocation
mi_assert(heap!=NULL);
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
mi_assert(heap->tld->thread_id == 0 || heap->tld->thread_id == _mi_thread_id()); // heaps are thread local
void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic
mi_track_malloc(p,size,zero);
@ -268,7 +272,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero)
// if p == NULL then behave as malloc.
// else if size == 0 then reallocate to a zero-sized block (and don't return NULL, just as mi_malloc(0)).
// (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.)
const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0)
const size_t size = (p==NULL ? 0 : _mi_usable_size(p,"mi_realloc"));
if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) { // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0)
mi_assert_internal(p!=NULL);
// todo: do not track as the usable size is still the same in the free; adjust potential padding?
@ -615,7 +619,6 @@ static void* mi_block_ptr_set_guarded(mi_block_t* block, size_t obj_size) {
block->next = MI_BLOCK_TAG_GUARDED;
// set guard page at the end of the block
mi_segment_t* const segment = _mi_page_segment(page);
const size_t block_size = mi_page_block_size(page); // must use `block_size` to match `mi_free_local`
const size_t os_page_size = _mi_os_page_size();
mi_assert_internal(block_size >= obj_size + os_page_size + sizeof(mi_block_t));
@ -625,8 +628,11 @@ static void* mi_block_ptr_set_guarded(mi_block_t* block, size_t obj_size) {
return NULL;
}
uint8_t* guard_page = (uint8_t*)block + block_size - os_page_size;
// note: the alignment of the guard page relies on blocks being os_page_size aligned which
// is ensured in `mi_arena_page_alloc_fresh`.
mi_assert_internal(_mi_is_aligned(block, os_page_size));
mi_assert_internal(_mi_is_aligned(guard_page, os_page_size));
if (segment->allow_decommit && _mi_is_aligned(guard_page, os_page_size)) {
if (!page->memid.is_pinned && _mi_is_aligned(guard_page, os_page_size)) {
_mi_os_protect(guard_page, os_page_size);
}
else {
@ -636,9 +642,9 @@ static void* mi_block_ptr_set_guarded(mi_block_t* block, size_t obj_size) {
// align pointer just in front of the guard page
size_t offset = block_size - os_page_size - obj_size;
mi_assert_internal(offset > sizeof(mi_block_t));
if (offset > MI_BLOCK_ALIGNMENT_MAX) {
if (offset > MI_PAGE_MAX_OVERALLOC_ALIGN) {
// give up to place it right in front of the guard page if the offset is too large for unalignment
offset = MI_BLOCK_ALIGNMENT_MAX;
offset = MI_PAGE_MAX_OVERALLOC_ALIGN;
}
void* p = (uint8_t*)block + offset;
mi_track_align(block, p, offset, obj_size);
@ -659,7 +665,7 @@ mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, boo
const size_t req_size = _mi_align_up(bsize + os_page_size, os_page_size);
mi_block_t* const block = (mi_block_t*)_mi_malloc_generic(heap, req_size, zero, 0 /* huge_alignment */);
if (block==NULL) return NULL;
void* const p = mi_block_ptr_set_guarded(block, obj_size);
void* const p = mi_block_ptr_set_guarded(block, obj_size);
// stats
mi_track_malloc(p, size, zero);
@ -668,7 +674,7 @@ mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, boo
#if MI_STAT>1
mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
#endif
_mi_stat_counter_increase(&heap->tld->stats.guarded_alloc_count, 1);
mi_heap_stat_counter_increase(heap, guarded_alloc_count, 1);
}
#if MI_DEBUG>3
if (p != NULL && zero) {

View file

@ -1,346 +0,0 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#if !defined(MI_IN_ARENA_C)
#error "this file should be included from 'arena.c' (so mi_arena_t is visible)"
// add includes help an IDE
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "bitmap.h"
#endif
// Minimal exports for arena-abandoned.
size_t mi_arena_id_index(mi_arena_id_t id);
mi_arena_t* mi_arena_from_index(size_t idx);
size_t mi_arena_get_count(void);
void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex);
bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index);
/* -----------------------------------------------------------
Abandoned blocks/segments:
_mi_arena_segment_clear_abandoned
_mi_arena_segment_mark_abandoned
This is used to atomically abandon/reclaim segments
(and crosses the arena API but it is convenient to have here).
Abandoned segments still have live blocks; they get reclaimed
when a thread frees a block in it, or when a thread needs a fresh
segment.
Abandoned segments are atomically marked in the `block_abandoned`
bitmap of arenas. Any segments allocated outside arenas are put
in the sub-process `abandoned_os_list`. This list is accessed
using locks but this should be uncommon and generally uncontended.
Reclaim and visiting either scan through the `block_abandoned`
bitmaps of the arena's, or visit the `abandoned_os_list`
A potentially nicer design is to use arena's for everything
and perhaps have virtual arena's to map OS allocated memory
but this would lack the "density" of our current arena's. TBC.
----------------------------------------------------------- */
// reclaim a specific OS abandoned segment; `true` on success.
// sets the thread_id.
static bool mi_arena_segment_os_clear_abandoned(mi_segment_t* segment, bool take_lock) {
mi_assert(segment->memid.memkind != MI_MEM_ARENA);
// not in an arena, remove from list of abandoned os segments
mi_subproc_t* const subproc = segment->subproc;
if (take_lock && !mi_lock_try_acquire(&subproc->abandoned_os_lock)) {
return false; // failed to acquire the lock, we just give up
}
// remove atomically from the abandoned os list (if possible!)
bool reclaimed = false;
mi_segment_t* const next = segment->abandoned_os_next;
mi_segment_t* const prev = segment->abandoned_os_prev;
if (next != NULL || prev != NULL || subproc->abandoned_os_list == segment) {
#if MI_DEBUG>3
// find ourselves in the abandoned list (and check the count)
bool found = false;
size_t count = 0;
for (mi_segment_t* current = subproc->abandoned_os_list; current != NULL; current = current->abandoned_os_next) {
if (current == segment) { found = true; }
count++;
}
mi_assert_internal(found);
mi_assert_internal(count == mi_atomic_load_relaxed(&subproc->abandoned_os_list_count));
#endif
// remove (atomically) from the list and reclaim
if (prev != NULL) { prev->abandoned_os_next = next; }
else { subproc->abandoned_os_list = next; }
if (next != NULL) { next->abandoned_os_prev = prev; }
else { subproc->abandoned_os_list_tail = prev; }
segment->abandoned_os_next = NULL;
segment->abandoned_os_prev = NULL;
mi_atomic_decrement_relaxed(&subproc->abandoned_count);
mi_atomic_decrement_relaxed(&subproc->abandoned_os_list_count);
if (take_lock) { // don't reset the thread_id when iterating
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
}
reclaimed = true;
}
if (take_lock) { mi_lock_release(&segment->subproc->abandoned_os_lock); }
return reclaimed;
}
// reclaim a specific abandoned segment; `true` on success.
// sets the thread_id.
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment) {
if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) {
return mi_arena_segment_os_clear_abandoned(segment, true /* take lock */);
}
// arena segment: use the blocks_abandoned bitmap.
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
mi_arena_t* arena = mi_arena_from_index(arena_idx);
mi_assert_internal(arena != NULL);
// reclaim atomically
bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx);
if (was_marked) {
mi_assert_internal(mi_atomic_load_acquire(&segment->thread_id) == 0);
mi_atomic_decrement_relaxed(&segment->subproc->abandoned_count);
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
}
// mi_assert_internal(was_marked);
mi_assert_internal(!was_marked || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
return was_marked;
}
// mark a specific OS segment as abandoned
static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) {
mi_assert(segment->memid.memkind != MI_MEM_ARENA);
// not in an arena; we use a list of abandoned segments
mi_subproc_t* const subproc = segment->subproc;
mi_lock(&subproc->abandoned_os_lock) {
// push on the tail of the list (important for the visitor)
mi_segment_t* prev = subproc->abandoned_os_list_tail;
mi_assert_internal(prev == NULL || prev->abandoned_os_next == NULL);
mi_assert_internal(segment->abandoned_os_prev == NULL);
mi_assert_internal(segment->abandoned_os_next == NULL);
if (prev != NULL) { prev->abandoned_os_next = segment; }
else { subproc->abandoned_os_list = segment; }
subproc->abandoned_os_list_tail = segment;
segment->abandoned_os_prev = prev;
segment->abandoned_os_next = NULL;
mi_atomic_increment_relaxed(&subproc->abandoned_os_list_count);
mi_atomic_increment_relaxed(&subproc->abandoned_count);
// and release the lock
}
return;
}
// mark a specific segment as abandoned
// clears the thread_id.
void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
{
mi_assert_internal(segment->used == segment->abandoned);
mi_atomic_store_release(&segment->thread_id, (uintptr_t)0); // mark as abandoned for multi-thread free's
if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) {
mi_arena_segment_os_mark_abandoned(segment);
return;
}
// segment is in an arena, mark it in the arena `blocks_abandoned` bitmap
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
mi_arena_t* arena = mi_arena_from_index(arena_idx);
mi_assert_internal(arena != NULL);
// set abandonment atomically
mi_subproc_t* const subproc = segment->subproc; // don't access the segment after setting it abandoned
const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
if (was_unmarked) { mi_atomic_increment_relaxed(&subproc->abandoned_count); }
mi_assert_internal(was_unmarked);
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
}
/* -----------------------------------------------------------
Iterate through the abandoned blocks/segments using a cursor.
This is used for reclaiming and abandoned block visiting.
----------------------------------------------------------- */
// start a cursor at a randomized arena
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool visit_all, mi_arena_field_cursor_t* current) {
mi_assert_internal(heap == NULL || heap->tld->segments.subproc == subproc);
current->bitmap_idx = 0;
current->subproc = subproc;
current->visit_all = visit_all;
current->hold_visit_lock = false;
const size_t abandoned_count = mi_atomic_load_relaxed(&subproc->abandoned_count);
const size_t abandoned_list_count = mi_atomic_load_relaxed(&subproc->abandoned_os_list_count);
const size_t max_arena = mi_arena_get_count();
if (heap != NULL && heap->arena_id != _mi_arena_id_none()) {
// for a heap that is bound to one arena, only visit that arena
current->start = mi_arena_id_index(heap->arena_id);
current->end = current->start + 1;
current->os_list_count = 0;
}
else {
// otherwise visit all starting at a random location
if (abandoned_count > abandoned_list_count && max_arena > 0) {
current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena));
current->end = current->start + max_arena;
}
else {
current->start = 0;
current->end = 0;
}
current->os_list_count = abandoned_list_count; // max entries to visit in the os abandoned list
}
mi_assert_internal(current->start <= max_arena);
}
void _mi_arena_field_cursor_done(mi_arena_field_cursor_t* current) {
if (current->hold_visit_lock) {
mi_lock_release(&current->subproc->abandoned_os_visit_lock);
current->hold_visit_lock = false;
}
}
static mi_segment_t* mi_arena_segment_clear_abandoned_at(mi_arena_t* arena, mi_subproc_t* subproc, mi_bitmap_index_t bitmap_idx) {
// try to reclaim an abandoned segment in the arena atomically
if (!_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) return NULL;
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
// check that the segment belongs to our sub-process
// note: this is the reason we need the `abandoned_visit` lock in the case abandoned visiting is enabled.
// without the lock an abandoned visit may otherwise fail to visit all abandoned segments in the sub-process.
// for regular reclaim it is fine to miss one sometimes so without abandoned visiting we don't need the `abandoned_visit` lock.
if (segment->subproc != subproc) {
// it is from another sub-process, re-mark it and continue searching
const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
mi_assert_internal(was_zero); MI_UNUSED(was_zero);
return NULL;
}
else {
// success, we unabandoned a segment in our sub-process
mi_atomic_decrement_relaxed(&subproc->abandoned_count);
return segment;
}
}
static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_cursor_t* previous) {
const size_t max_arena = mi_arena_get_count();
size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx);
size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx);
// visit arena's (from the previous cursor)
for (; previous->start < previous->end; previous->start++, field_idx = 0, bit_idx = 0) {
// index wraps around
size_t arena_idx = (previous->start >= max_arena ? previous->start % max_arena : previous->start);
mi_arena_t* arena = mi_arena_from_index(arena_idx);
if (arena != NULL) {
bool has_lock = false;
// visit the abandoned fields (starting at previous_idx)
for (; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]);
if mi_unlikely(field != 0) { // skip zero fields quickly
// we only take the arena lock if there are actually abandoned segments present
if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) {
has_lock = (previous->visit_all ? (mi_lock_acquire(&arena->abandoned_visit_lock),true) : mi_lock_try_acquire(&arena->abandoned_visit_lock));
if (!has_lock) {
if (previous->visit_all) {
_mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the visitor lock");
}
// skip to next arena
break;
}
}
mi_assert_internal(has_lock || !mi_option_is_enabled(mi_option_visit_abandoned));
// visit each set bit in the field (todo: maybe use `ctz` here?)
for (; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
// pre-check if the bit is set
size_t mask = ((size_t)1 << bit_idx);
if mi_unlikely((field & mask) == mask) {
mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
mi_segment_t* const segment = mi_arena_segment_clear_abandoned_at(arena, previous->subproc, bitmap_idx);
if (segment != NULL) {
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
previous->bitmap_idx = mi_bitmap_index_create_ex(field_idx, bit_idx + 1); // start at next one for the next iteration
return segment;
}
}
}
}
}
if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
}
}
return NULL;
}
static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_cursor_t* previous) {
// go through the abandoned_os_list
// we only allow one thread per sub-process to do to visit guarded by the `abandoned_os_visit_lock`.
// The lock is released when the cursor is released.
if (!previous->hold_visit_lock) {
previous->hold_visit_lock = (previous->visit_all ? (mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock),true)
: mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock));
if (!previous->hold_visit_lock) {
if (previous->visit_all) {
_mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the OS visitor lock");
}
return NULL; // we cannot get the lock, give up
}
}
// One list entry at a time
while (previous->os_list_count > 0) {
previous->os_list_count--;
mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free`
mi_segment_t* segment = previous->subproc->abandoned_os_list;
// pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries)
if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) {
mi_lock_release(&previous->subproc->abandoned_os_lock);
return segment;
}
// already abandoned, try again
mi_lock_release(&previous->subproc->abandoned_os_lock);
}
// done
mi_assert_internal(previous->os_list_count == 0);
return NULL;
}
// reclaim abandoned segments
// this does not set the thread id (so it appears as still abandoned)
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous) {
if (previous->start < previous->end) {
// walk the arena
mi_segment_t* segment = mi_arena_segment_clear_abandoned_next_field(previous);
if (segment != NULL) { return segment; }
}
// no entries in the arena's anymore, walk the abandoned OS list
mi_assert_internal(previous->start == previous->end);
return mi_arena_segment_clear_abandoned_next_list(previous);
}
bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
// (unfortunately) the visit_abandoned option must be enabled from the start.
// This is to avoid taking locks if abandoned list visiting is not required (as for most programs)
if (!mi_option_is_enabled(mi_option_visit_abandoned)) {
_mi_error_message(EFAULT, "internal error: can only visit abandoned blocks when MIMALLOC_VISIT_ABANDONED=ON");
return false;
}
mi_arena_field_cursor_t current;
_mi_arena_field_cursor_init(NULL, _mi_subproc_from_id(subproc_id), true /* visit all (blocking) */, &current);
mi_segment_t* segment;
bool ok = true;
while (ok && (segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL) {
ok = _mi_segment_visit_blocks(segment, heap_tag, visit_blocks, visitor, arg);
_mi_arena_segment_mark_abandoned(segment);
}
_mi_arena_field_cursor_done(&current);
return ok;
}

174
src/arena-meta.c Normal file
View file

@ -0,0 +1,174 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
We have a special "mini" allocator just for allocation of meta-data like
the heap (`mi_heap_t`) or thread-local data (`mi_tld_t`).
We reuse the bitmap of the arena's for allocation of 64b blocks inside
an arena slice (64KiB).
We always ensure that meta data is zero'd (we zero on `free`)
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "bitmap.h"
/* -----------------------------------------------------------
Meta data allocation
----------------------------------------------------------- */
#define MI_META_PAGE_SIZE MI_ARENA_SLICE_SIZE
#define MI_META_PAGE_ALIGN MI_ARENA_SLICE_ALIGN
#define MI_META_BLOCK_SIZE (128) // large enough such that META_MAX_SIZE > 4k (even on 32-bit)
#define MI_META_BLOCK_ALIGN MI_META_BLOCK_SIZE
#define MI_META_BLOCKS_PER_PAGE (MI_ARENA_SLICE_SIZE / MI_META_BLOCK_SIZE) // 1024
#define MI_META_MAX_SIZE (MI_BCHUNK_SIZE * MI_META_BLOCK_SIZE)
typedef struct mi_meta_page_s {
_Atomic(struct mi_meta_page_s*) next; // a linked list of meta-data pages (never released)
mi_memid_t memid; // provenance of the meta-page memory itself
mi_bbitmap_t blocks_free; // a small bitmap with 1 bit per block.
} mi_meta_page_t;
static mi_decl_cache_align _Atomic(mi_meta_page_t*) mi_meta_pages = MI_ATOMIC_VAR_INIT(NULL);
#if MI_DEBUG > 1
static mi_meta_page_t* mi_meta_page_of_ptr(void* p, size_t* block_idx) {
mi_meta_page_t* mpage = (mi_meta_page_t*)((uint8_t*)mi_align_down_ptr(p,MI_META_PAGE_ALIGN) + _mi_os_secure_guard_page_size());
if (block_idx != NULL) {
*block_idx = ((uint8_t*)p - (uint8_t*)mpage) / MI_META_BLOCK_SIZE;
}
return mpage;
}
#endif
static mi_meta_page_t* mi_meta_page_next( mi_meta_page_t* mpage ) {
return mi_atomic_load_ptr_acquire(mi_meta_page_t, &mpage->next);
}
static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) {
mi_assert_internal(_mi_is_aligned((uint8_t*)mpage - _mi_os_secure_guard_page_size(), MI_META_PAGE_ALIGN));
mi_assert_internal(block_idx < MI_META_BLOCKS_PER_PAGE);
void* p = ((uint8_t*)mpage - _mi_os_secure_guard_page_size() + (block_idx * MI_META_BLOCK_SIZE));
mi_assert_internal(mpage == mi_meta_page_of_ptr(p,NULL));
return p;
}
// allocate a fresh meta page and add it to the global list.
static mi_meta_page_t* mi_meta_page_zalloc(void) {
// allocate a fresh arena slice
// note: careful with _mi_subproc as it may recurse into mi_tld and meta_page_zalloc again..
mi_memid_t memid;
uint8_t* base = (uint8_t*)_mi_arenas_alloc_aligned(_mi_subproc(), MI_META_PAGE_SIZE, MI_META_PAGE_ALIGN, 0,
true /* commit*/, (MI_SECURE==0) /* allow large? */,
NULL /* req arena */, 0 /* thread_seq */, &memid);
if (base == NULL) return NULL;
mi_assert_internal(_mi_is_aligned(base,MI_META_PAGE_ALIGN));
if (!memid.initially_zero) {
_mi_memzero_aligned(base, MI_ARENA_SLICE_SIZE);
}
// guard pages
#if MI_SECURE >= 1
_mi_os_secure_guard_page_set_at(base, memid.is_pinned);
_mi_os_secure_guard_page_set_before(base + MI_META_PAGE_SIZE, memid.is_pinned);
#endif
// initialize the page and free block bitmap
mi_meta_page_t* mpage = (mi_meta_page_t*)(base + _mi_os_secure_guard_page_size());
mpage->memid = memid;
mi_bbitmap_init(&mpage->blocks_free, MI_META_BLOCKS_PER_PAGE, true /* already_zero */);
const size_t mpage_size = offsetof(mi_meta_page_t,blocks_free) + mi_bbitmap_size(MI_META_BLOCKS_PER_PAGE, NULL);
const size_t info_blocks = _mi_divide_up(mpage_size,MI_META_BLOCK_SIZE);
const size_t guard_blocks = _mi_divide_up(_mi_os_secure_guard_page_size(), MI_META_BLOCK_SIZE);
mi_assert_internal(info_blocks + 2*guard_blocks < MI_META_BLOCKS_PER_PAGE);
mi_bbitmap_unsafe_setN(&mpage->blocks_free, info_blocks + guard_blocks, MI_META_BLOCKS_PER_PAGE - info_blocks - 2*guard_blocks);
// push atomically in front of the meta page list
// (note: there is no ABA issue since we never free meta-pages)
mi_meta_page_t* old = mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages);
do {
mi_atomic_store_ptr_release(mi_meta_page_t, &mpage->next, old);
} while(!mi_atomic_cas_ptr_weak_acq_rel(mi_meta_page_t,&mi_meta_pages,&old,mpage));
return mpage;
}
// allocate meta-data
mi_decl_noinline void* _mi_meta_zalloc( size_t size, mi_memid_t* pmemid )
{
mi_assert_internal(pmemid != NULL);
size = _mi_align_up(size,MI_META_BLOCK_SIZE);
if (size == 0 || size > MI_META_MAX_SIZE) return NULL;
const size_t block_count = _mi_divide_up(size,MI_META_BLOCK_SIZE);
mi_assert_internal(block_count > 0 && block_count < MI_BCHUNK_BITS);
mi_meta_page_t* mpage0 = mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages);
mi_meta_page_t* mpage = mpage0;
while (mpage != NULL) {
size_t block_idx;
if (mi_bbitmap_try_find_and_clearN(&mpage->blocks_free, block_count, 0, &block_idx)) {
// found and claimed `block_count` blocks
*pmemid = _mi_memid_create_meta(mpage, block_idx, block_count);
return mi_meta_block_start(mpage,block_idx);
}
else {
mpage = mi_meta_page_next(mpage);
}
}
// failed to find space in existing pages
if (mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages) != mpage0) {
// the page list was updated by another thread in the meantime, retry
return _mi_meta_zalloc(size,pmemid);
}
// otherwise, allocate a fresh metapage and try once more
mpage = mi_meta_page_zalloc();
if (mpage != NULL) {
size_t block_idx;
if (mi_bbitmap_try_find_and_clearN(&mpage->blocks_free, block_count, 0, &block_idx)) {
// found and claimed `block_count` blocks
*pmemid = _mi_memid_create_meta(mpage, block_idx, block_count);
return mi_meta_block_start(mpage,block_idx);
}
}
// if all this failed, allocate from the OS
return _mi_os_alloc(size, pmemid);
}
// free meta-data
mi_decl_noinline void _mi_meta_free(void* p, size_t size, mi_memid_t memid) {
if (p==NULL) return;
if (memid.memkind == MI_MEM_META) {
mi_assert_internal(_mi_divide_up(size, MI_META_BLOCK_SIZE) == memid.mem.meta.block_count);
const size_t block_count = memid.mem.meta.block_count;
const size_t block_idx = memid.mem.meta.block_index;
mi_meta_page_t* mpage = (mi_meta_page_t*)memid.mem.meta.meta_page;
mi_assert_internal(mi_meta_page_of_ptr(p,NULL) == mpage);
mi_assert_internal(block_idx + block_count < MI_META_BLOCKS_PER_PAGE);
mi_assert_internal(mi_bbitmap_is_clearN(&mpage->blocks_free, block_idx, block_count));
// we zero on free (and on the initial page allocation) so we don't need a "dirty" map
_mi_memzero_aligned(mi_meta_block_start(mpage, block_idx), block_count*MI_META_BLOCK_SIZE);
mi_bbitmap_setN(&mpage->blocks_free, block_idx, block_count);
}
else {
_mi_arenas_free(p,size,memid);
}
}
// used for debug output
bool _mi_meta_is_meta_page(void* p)
{
mi_meta_page_t* mpage0 = mi_atomic_load_ptr_acquire(mi_meta_page_t, &mi_meta_pages);
mi_meta_page_t* mpage = mpage0;
while (mpage != NULL) {
if ((void*)mpage == p) return true;
mpage = mi_meta_page_next(mpage);
}
return false;
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,110 +1,317 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically,
represented as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
(this is used in region allocation)
The `_across` postfixed functions do allow sequences that can cross over
between the fields. (This is used in arena allocation)
Concurrent bitmap that can set/reset sequences of bits atomically
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITMAP_H
#define MI_BITMAP_H
/* -----------------------------------------------------------
Bitmap definition
----------------------------------------------------------- */
/* --------------------------------------------------------------------------------
Atomic bitmaps with release/acquire guarantees:
#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE)
#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set
`mi_bfield_t`: is a single machine word that can efficiently be bit counted (usually `size_t`)
each bit usually represents a single MI_ARENA_SLICE_SIZE in an arena (64 KiB).
We need 16K bits to represent a 1GiB arena.
// An atomic bitmap of `size_t` fields
typedef _Atomic(size_t) mi_bitmap_field_t;
typedef mi_bitmap_field_t* mi_bitmap_t;
`mi_bchunk_t`: a chunk of bfield's of a total of MI_BCHUNK_BITS (= 512 on 64-bit, 256 on 32-bit)
allocations never span across chunks -- so MI_ARENA_MAX_OBJ_SIZE is the number
of bits in a chunk times the MI_ARENA_SLICE_SIZE (512 * 64KiB = 32 MiB).
These chunks are cache-aligned and we can use AVX2/AVX512/NEON/SVE/SVE2/etc. instructions
to scan for bits (perhaps) more efficiently.
// A bitmap index is the index of the bit in a bitmap.
typedef size_t mi_bitmap_index_t;
We allocate byte-sized ranges aligned to bytes in the bfield, and bfield-sized
ranges aligned to a bfield.
// Create a bit index.
static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS);
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
}
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
return mi_bitmap_index_create_ex(idx,bitidx);
}
Searching linearly through the chunks would be too slow (16K bits per GiB).
Instead we add a "chunkmap" to do a two-level search (more or less a btree of depth 2).
// Get the field index from a bit index.
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
}
`mi_bchunkmap_t` (== `mi_bchunk_t`): for each chunk we track if it has (potentially) any bit set.
The chunkmap has 1 bit per chunk that is set if the chunk potentially has a bit set.
This is used to avoid scanning every chunk. (and thus strictly an optimization)
It is conservative: it is fine to set a bit in the chunk map even if the chunk turns out
to have no bits set. It is also allowed to briefly have a clear bit even if the
chunk has bits set -- as long as we guarantee that the bit will be set later on;
(this allows us to set the chunkmap bit right after we set a bit in the corresponding chunk).
// Get the bit index in a bitmap field
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
}
However, when we clear a bit in a chunk, and the chunk is indeed all clear, we
cannot safely clear the bit corresponding to the chunk in the chunkmap since it
may race with another thread setting a bit in the same chunk. Therefore, when
clearing, we first test if a chunk is clear, then clear the chunkmap bit, and
then test again to catch any set bits that we may have missed.
// Get the full bit index
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
return bitmap_idx;
}
Since the chunkmap may thus be briefly out-of-sync, this means that we may sometimes
not find a free page even though it's there (but we accept this as we avoid taking
full locks). (Another way to do this is to use an epoch but we like to avoid that complexity
for now).
/* -----------------------------------------------------------
Claim a bit sequence atomically
----------------------------------------------------------- */
`mi_bitmap_t`: a bitmap with N chunks. A bitmap has a chunkmap of MI_BCHUNK_BITS (512)
and thus has at most 512 chunks (=2^18 bits x 64 KiB slices = 16 GiB max arena size).
The minimum is 1 chunk which is a 32 MiB arena.
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
For now, the implementation assumes MI_HAS_FAST_BITSCAN and uses trailing-zero-count
and pop-count (but we think it can be adapted work reasonably well on older hardware too)
--------------------------------------------------------------------------------------------- */
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
// A word-size bit field.
typedef size_t mi_bfield_t;
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
#define MI_BFIELD_LO_BIT8 (((~(mi_bfield_t)0))/0xFF) // 0x01010101 ..
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
// Returns `true` if successful when all previous `count` bits were 0.
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
#define MI_BCHUNK_SIZE (MI_BCHUNK_BITS / 8)
#define MI_BCHUNK_FIELDS (MI_BCHUNK_BITS / MI_BFIELD_BITS) // 8 on both 64- and 32-bit
//--------------------------------------------------------------------------
// the `_across` functions work on bitmaps where sequences can cross over
// between the fields. This is used in arena allocation
//--------------------------------------------------------------------------
// A bitmap chunk contains 512 bits on 64-bit (256 on 32-bit)
typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bchunk_s {
_Atomic(mi_bfield_t) bfields[MI_BCHUNK_FIELDS];
} mi_bchunk_t;
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// The chunkmap has one bit per corresponding chunk that is set if the chunk potentially has bits set.
// The chunkmap is itself a chunk.
typedef mi_bchunk_t mi_bchunkmap_t;
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
#define MI_BCHUNKMAP_BITS MI_BCHUNK_BITS
#define MI_BITMAP_MAX_CHUNK_COUNT (MI_BCHUNKMAP_BITS)
#define MI_BITMAP_MIN_CHUNK_COUNT (1)
#if MI_SIZE_BITS > 32
#define MI_BITMAP_DEFAULT_CHUNK_COUNT (64) // 2 GiB on 64-bit -- this is for the page map
#else
#define MI_BITMAP_DEFAULT_CHUNK_COUNT (1)
#endif
#define MI_BITMAP_MAX_BIT_COUNT (MI_BITMAP_MAX_CHUNK_COUNT * MI_BCHUNK_BITS) // 16 GiB arena
#define MI_BITMAP_MIN_BIT_COUNT (MI_BITMAP_MIN_CHUNK_COUNT * MI_BCHUNK_BITS) // 32 MiB arena
#define MI_BITMAP_DEFAULT_BIT_COUNT (MI_BITMAP_DEFAULT_CHUNK_COUNT * MI_BCHUNK_BITS) // 2 GiB arena
// An atomic bitmap
typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s {
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1]; // suppress warning on msvc
mi_bchunkmap_t chunkmap;
mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
} mi_bitmap_t;
static inline size_t mi_bitmap_chunk_count(const mi_bitmap_t* bitmap) {
return mi_atomic_load_relaxed(&((mi_bitmap_t*)bitmap)->chunk_count);
}
static inline size_t mi_bitmap_max_bits(const mi_bitmap_t* bitmap) {
return (mi_bitmap_chunk_count(bitmap) * MI_BCHUNK_BITS);
}
/* --------------------------------------------------------------------------------
Atomic bitmap operations
-------------------------------------------------------------------------------- */
// Many operations are generic over setting or clearing the bit sequence: we use `mi_xset_t` for this (true if setting, false if clearing)
typedef bool mi_xset_t;
#define MI_BIT_SET (true)
#define MI_BIT_CLEAR (false)
// Required size of a bitmap to represent `bit_count` bits.
size_t mi_bitmap_size(size_t bit_count, size_t* chunk_count);
// Initialize a bitmap to all clear; avoid a mem_zero if `already_zero` is true
// returns the size of the bitmap.
size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero);
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks).
// Not atomic so only use if still local to a thread.
void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n);
// Set a bit in the bitmap; returns `true` if it atomically transitioned from 0 to 1
bool mi_bitmap_set(mi_bitmap_t* bitmap, size_t idx);
// Clear a bit in the bitmap; returns `true` if it atomically transitioned from 1 to 0
bool mi_bitmap_clear(mi_bitmap_t* bitmap, size_t idx);
// Set a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's
// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)!
// If `already_set` is not NULL, it is set to count of bits were already all set.
// (this is used for correct statistics if commiting over a partially committed area)
bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_set);
// Clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 1's to 0's
// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)!
bool mi_bitmap_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n);
// Is a sequence of n bits already all set/cleared?
bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
// Is a sequence of n bits already set?
// (Used to check if a memory range is already committed)
static inline bool mi_bitmap_is_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
return mi_bitmap_is_xsetN(MI_BIT_SET, bitmap, idx, n);
}
// Is a sequence of n bits already clear?
static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
return mi_bitmap_is_xsetN(MI_BIT_CLEAR, bitmap, idx, n);
}
static inline bool mi_bitmap_is_set(mi_bitmap_t* bitmap, size_t idx) {
return mi_bitmap_is_setN(bitmap, idx, 1);
}
static inline bool mi_bitmap_is_clear(mi_bitmap_t* bitmap, size_t idx) {
return mi_bitmap_is_clearN(bitmap, idx, 1);
}
// Called once a bit is cleared to see if the memory slice can be claimed.
typedef bool (mi_claim_fun_t)(size_t slice_index, mi_arena_t* arena, mi_heaptag_t heap_tag, bool* keep_set);
// Find a set bits in the bitmap, atomically clear it, and check if `claim` returns true.
// If not claimed, continue on (potentially setting the bit again depending on `keep_set`).
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx,
mi_claim_fun_t* claim, mi_arena_t* arena, mi_heaptag_t heap_tag );
// Atomically clear a bit but only if it is set. Will block otherwise until the bit is set.
// This is used to delay free-ing a page that it at the same time being considered to be
// allocated from `mi_arena_try_abandoned` (and is in the `claim` function of `mi_bitmap_try_find_and_claim`).
void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx);
// If a bit is set in the bitmap, return `true` and set `idx` to the index of the highest bit.
// Otherwise return `false` (and `*idx` is undefined).
// Used for unloading arena's
bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx);
typedef bool (mi_forall_set_fun_t)(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg2);
// Visit all set bits in a bitmap (`slice_count == 1`)
bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
// Visit all set bits in a bitmap with larger ranges if possible (`slice_count >= 1`)
bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
/* ----------------------------------------------------------------------------
Binned concurrent bitmap
Assigns a size class to each chunk such that small blocks don't cause too
much fragmentation since we keep chunks for larger blocks separate.
---------------------------------------------------------------------------- */
// Size bins; larger bins are allowed to go into smaller bins.
// SMALL can only be in small (and NONE), so they cannot fragment the larger bins.
typedef enum mi_bbin_e {
MI_BBIN_NONE, // no bin assigned yet (the chunk is completely free)
MI_BBIN_SMALL, // slice_count == 1
MI_BBIN_OTHER, // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS
MI_BBIN_MEDIUM, // slice_count == 8
MI_BBIN_LARGE, // slice_count == MI_BFIELD_BITS -- only used if MI_ENABLE_LARGE_PAGES is 1
MI_BBIN_COUNT
} mi_bbin_t;
static inline mi_bbin_t mi_bbin_inc(mi_bbin_t bbin) {
return (mi_bbin_t)((int)bbin + 1);
}
static inline mi_bbin_t mi_bbin_of(size_t slice_count) {
if (slice_count==1) return MI_BBIN_SMALL;
if (slice_count==8) return MI_BBIN_MEDIUM;
#if MI_ENABLE_LARGE_PAGES
if (slice_count==MI_BFIELD_BITS) return MI_BBIN_LARGE;
#endif
return MI_BBIN_OTHER;
}
// An atomic "binned" bitmap for the free slices where we keep chunks reserved for particalar size classes
typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bbitmap_s {
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
_Atomic(size_t) chunk_max_accessed; // max chunk index that was once cleared or set
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc
mi_bchunkmap_t chunkmap;
_Atomic(uint8_t) chunk_bins[MI_BITMAP_MAX_CHUNK_COUNT]; // 512b
mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
} mi_bbitmap_t;
static inline size_t mi_bbitmap_chunk_count(const mi_bbitmap_t* bbitmap) {
return mi_atomic_load_relaxed(&((mi_bbitmap_t*)bbitmap)->chunk_count);
}
static inline size_t mi_bbitmap_max_bits(const mi_bbitmap_t* bbitmap) {
return (mi_bbitmap_chunk_count(bbitmap) * MI_BCHUNK_BITS);
}
size_t mi_bbitmap_size(size_t bit_count, size_t* chunk_count);
// Initialize a bitmap to all clear; avoid a mem_zero if `already_zero` is true
// returns the size of the bitmap.
size_t mi_bbitmap_init(mi_bbitmap_t* bbitmap, size_t bit_count, bool already_zero);
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks).
// Not atomic so only use if still local to a thread.
void mi_bbitmap_unsafe_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
// Set a sequence of `n` bits in the bbitmap; returns `true` if atomically transitioned from all 0's to 1's
// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)!
bool mi_bbitmap_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
// Clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 1's to 0's
// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)!
bool mi_bbitmap_clearN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
// Is a sequence of n bits already all set/cleared?
bool mi_bbitmap_is_xsetN(mi_xset_t set, mi_bbitmap_t* bbitmap, size_t idx, size_t n);
// Is a sequence of n bits already set?
// (Used to check if a memory range is already committed)
static inline bool mi_bbitmap_is_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
return mi_bbitmap_is_xsetN(MI_BIT_SET, bbitmap, idx, n);
}
// Is a sequence of n bits already clear?
static inline bool mi_bbitmap_is_clearN(mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
return mi_bbitmap_is_xsetN(MI_BIT_CLEAR, bbitmap, idx, n);
}
// Try to atomically transition `n` bits from all set to all clear. Returns `true` on succes.
// `n` cannot cross chunk boundaries, where `n <= MI_CHUNK_BITS`.
bool mi_bbitmap_try_clearN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
// Specialized versions for common bit sequence sizes
bool mi_bbitmap_try_find_and_clear(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // 1-bit
bool mi_bbitmap_try_find_and_clear8(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // 8-bits
// bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // MI_BFIELD_BITS
bool mi_bbitmap_try_find_and_clearNX(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // < MI_BFIELD_BITS
bool mi_bbitmap_try_find_and_clearN_(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // > MI_BFIELD_BITS <= MI_BCHUNK_BITS
// Find a sequence of `n` bits in the bbitmap with all bits set, and try to atomically clear all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
mi_decl_nodiscard static inline bool mi_bbitmap_try_find_and_clearN(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx) {
if (n==1) return mi_bbitmap_try_find_and_clear(bbitmap, tseq, pidx); // small pages
if (n==8) return mi_bbitmap_try_find_and_clear8(bbitmap, tseq, pidx); // medium pages
// if (n==MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearX(bbitmap, tseq, pidx); // large pages
if (n==0 || n>MI_BCHUNK_BITS) return false; // cannot be more than a chunk
if (n<=MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearNX(bbitmap, tseq, n, pidx);
return mi_bbitmap_try_find_and_clearN_(bbitmap, tseq, n, pidx);
}
#endif // MI_BITMAP_H

View file

@ -23,9 +23,6 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block);
// Free
// ------------------------------------------------------
// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block);
// regular free of a (thread local) block pointer
// fast path written carefully to prevent spilling on the stack
static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool track_stats, bool check_full)
@ -50,6 +47,40 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
}
}
// Forward declaration for multi-threaded collect
static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) mi_attr_noexcept;
// Free a block multi-threaded
static inline void mi_free_block_mt(mi_page_t* page, mi_block_t* block) mi_attr_noexcept
{
// adjust stats (after padding check and potentially recursive `mi_free` above)
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free_size(block, mi_page_usable_size_of(page, block));
// _mi_padding_shrink(page, block, sizeof(mi_block_t));
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
size_t dbgsize = mi_usable_size(block);
if (dbgsize > MI_MiB) { dbgsize = MI_MiB; }
_mi_memset_aligned(block, MI_DEBUG_FREED, dbgsize);
#endif
// push atomically on the page thread free list
mi_thread_free_t tf_new;
mi_thread_free_t tf_old = mi_atomic_load_relaxed(&page->xthread_free);
do {
mi_block_set_next(page, block, mi_tf_block(tf_old));
tf_new = mi_tf_create(block, true /* always owned: try to claim it if abandoned */);
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new)); // todo: release is enough?
// and atomically try to collect the page if it was abandoned
const bool is_owned_now = !mi_tf_is_owned(tf_old);
if (is_owned_now) {
mi_assert_internal(mi_page_is_abandoned(page));
mi_free_try_collect_mt(page);
}
}
// Adjust a block that was allocated aligned, to the actual start of the block in the page.
// note: this can be called from `mi_free_generic_mt` where a non-owning thread accesses the
// `page_start` and `block_size` fields; however these are constant and the page won't be
@ -57,7 +88,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) {
mi_assert_internal(page!=NULL && p!=NULL);
size_t diff = (uint8_t*)p - page->page_start;
size_t diff = (uint8_t*)p - mi_page_start(page);
size_t adjust;
if mi_likely(page->block_size_shift != 0) {
adjust = diff & (((size_t)1 << page->block_size_shift) - 1);
@ -81,218 +112,153 @@ static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, vo
}
#endif
// free a local pointer (page parameter comes first for better codegen)
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
MI_UNUSED(segment);
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, void* p) mi_attr_noexcept {
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p);
mi_block_check_unguard(page, block, p);
mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */);
}
// free a pointer owned by another thread (page parameter comes first for better codegen)
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, void* p) mi_attr_noexcept {
if (p==NULL) return; // a NULL pointer is seen as abandoned (tid==0) with a full flag set
mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865)
mi_block_check_unguard(page, block, p);
mi_free_block_mt(page, segment, block);
mi_free_block_mt(page, block);
}
// generic free (for runtime integration)
void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
if (is_local) mi_free_generic_local(page,segment,p);
else mi_free_generic_mt(page,segment,p);
void mi_decl_noinline _mi_free_generic(mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
if (is_local) mi_free_generic_local(page,p);
else mi_free_generic_mt(page,p);
}
// Get the segment data belonging to a pointer
// This is just a single `and` in release mode but does further checks in debug mode
// (and secure mode) to see if this was a valid pointer.
static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg)
{
MI_UNUSED(msg);
#if (MI_DEBUG>0)
MI_UNUSED_RELEASE(msg);
#if MI_DEBUG
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0 && !mi_option_is_enabled(mi_option_guarded_precise)) {
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
return NULL;
}
#endif
mi_segment_t* const segment = _mi_ptr_segment(p);
if mi_unlikely(segment==NULL) return segment;
#if (MI_DEBUG>0)
if mi_unlikely(!mi_is_in_heap_region(p)) {
_mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
"(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
}
mi_page_t* const page = _mi_safe_ptr_page(p);
if (page == NULL && p != NULL) {
_mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p);
}
return page;
#else
return _mi_ptr_page(p);
#endif
#if (MI_DEBUG>0 || MI_SECURE>=4)
if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
_mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
return NULL;
}
#endif
return segment;
}
// Free a block
// Fast path written carefully to prevent register spilling on the stack
void mi_free(void* p) mi_attr_noexcept
{
mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
if mi_unlikely(segment==NULL) return;
mi_page_t* const page = mi_checked_ptr_page(p,"mi_free");
const bool is_local = (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_page_t* const page = _mi_segment_page_of(segment, p);
#if MI_PAGE_MAP_FLAT // if not flat, NULL will point to `_mi_page_empty` and get to `mi_free_generic_mt`
if mi_unlikely(page==NULL) return;
#endif
if mi_likely(is_local) { // thread-local free?
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
// thread-local, aligned, and not a full page
mi_block_t* const block = (mi_block_t*)p;
mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */);
}
else {
// page is full or contains (inner) aligned blocks; use generic path
mi_free_generic_local(page, segment, p);
}
const mi_threadid_t xtid = (_mi_prim_thread_id() ^ mi_page_xthread_id(page));
if mi_likely(xtid == 0) { // thread-local free? `tid==mi_page_thread_id(page) && mi_page_flags(page)==0`
// thread-local, aligned, and not a full page
mi_block_t* const block = (mi_block_t*)p;
mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */);
}
else if (xtid <= MI_PAGE_FLAG_MASK) { // `tid= = mi_page_thread_id(page) && mi_page_flags(page)!=0`
// page is local, but is full or contains (inner) aligned blocks; use generic path
mi_free_generic_local(page, p);
}
// free-ing in a page owned by a heap in another thread, or on abandoned page (not belonging to a heap)
else if ((xtid & MI_PAGE_FLAG_MASK) == 0) { // `tid!=mi_page_thread_id(page) && mi_page_flags(page)==0`
// blocks are aligned (and not a full page)
mi_block_t* const block = (mi_block_t*)p;
mi_free_block_mt(page,block);
}
else {
// not thread-local; use generic path
mi_free_generic_mt(page, segment, p);
}
// page is full or contains (inner) aligned blocks; use generic multi-thread path
mi_free_generic_mt(page, p);
}
}
// return true if successful
bool _mi_free_delayed_block(mi_block_t* block) {
// get segment and page
mi_assert_internal(block!=NULL);
const mi_segment_t* const segment = _mi_ptr_segment(block);
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(_mi_thread_id() == segment->thread_id);
mi_page_t* const page = _mi_segment_page_of(segment, block);
// Clear the no-delayed flag so delayed freeing is used again for this page.
// This must be done before collecting the free lists on this page -- otherwise
// some blocks may end up in the page `thread_free` list with no blocks in the
// heap `thread_delayed_free` list which may cause the page to be never freed!
// (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) {
return false;
}
// collect all other non-local frees (move from `thread_free` to `free`) to ensure up-to-date `used` count
_mi_page_free_collect(page, false);
// and free the block (possibly freeing the page as well since `used` is updated)
mi_free_block_local(page, block, false /* stats have already been adjusted */, true /* check for a full page */);
return true;
}
// ------------------------------------------------------
// Multi-threaded Free (`_mt`)
// ------------------------------------------------------
// Push a block that is owned by another thread on its page-local thread free
// list or it's heap delayed free list. Such blocks are later collected by
// the owning thread in `_mi_free_delayed_block`.
static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block_t* block )
{
// Try to put the block on either the page-local thread free list,
// or the heap delayed free list (if this is the first non-local free in that page)
mi_thread_free_t tfreex;
bool use_delayed;
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
if mi_unlikely(use_delayed) {
// unlikely: this only happens on the first concurrent free in a page that is in the full list
tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
}
else {
// usual: directly add to page thread_free list
mi_block_set_next(page, block, mi_tf_block(tfree));
tfreex = mi_tf_set_block(tfree,block);
}
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
// If this was the first non-local free, we need to push it on the heap delayed free list instead
if mi_unlikely(use_delayed) {
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
mi_assert_internal(heap != NULL);
if (heap != NULL) {
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
do {
mi_block_set_nextx(heap,block,dfree, heap->keys);
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
}
static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) mi_attr_noexcept {
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page));
// and reset the MI_DELAYED_FREEING flag
tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
tfreex = tfree;
mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
}
}
// we own the page now..
// safe to collect the thread atomic free list
_mi_page_free_collect(page, false); // update `used` count
#if MI_DEBUG > 1
if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_all_free(page)); }
#endif
// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block)
{
// first see if the segment was abandoned and if we can reclaim it into our thread
if (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 &&
#if MI_HUGE_PAGE_ABANDON
segment->page_kind != MI_PAGE_HUGE &&
#endif
mi_atomic_load_relaxed(&segment->thread_id) == 0 && // segment is abandoned?
mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) // and we did not already exit this thread (without this check, a fresh heap will be initalized (issue #944))
// 1. free if the page is free now
if (mi_page_all_free(page))
{
// the segment is abandoned, try to reclaim it into our heap
if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) {
mi_assert_internal(_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_assert_internal(mi_heap_get_default()->tld->segments.subproc == segment->subproc);
mi_free(block); // recursively free as now it will be a local free in our heap
return;
// first remove it from the abandoned pages in the arena (if mapped, this waits for any readers to finish)
_mi_arenas_page_unabandon(page);
// we can free the page directly
_mi_arenas_page_free(page);
return;
}
// 2. if the page is not too full, we can try to reclaim it for ourselves
// note: this seems a bad idea but it speeds up some benchmarks (like `larson`) quite a bit.
if (_mi_option_get_fast(mi_option_reclaim_on_free) != 0 &&
!mi_page_is_used_at_frac(page,8)
// && !mi_page_is_abandoned_mapped(page)
)
{
// the page has still some blocks in use (but not too many)
// reclaim in our heap if compatible, or otherwise abandon again
// todo: optimize this check further?
// note: don't use `mi_heap_get_default()` as we may just have terminated this thread and we should
// not reinitialize the heap for this thread. (can happen due to thread-local destructors for example -- issue #944)
mi_heap_t* const heap = mi_prim_get_default_heap();
if (heap != (mi_heap_t*)&_mi_heap_empty) // we did not already terminate our thread (can this happen?
{
mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag);
if ((tagheap != NULL) && // don't reclaim across heap object types
(tagheap->allow_page_reclaim) && // we are allowed to reclaim abandoned pages
// (page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? )
(_mi_arena_memid_is_suitable(page->memid, tagheap->exclusive_arena)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?)
)
{
if (mi_page_queue(tagheap, page->block_size)->first != NULL) { // don't reclaim for an block_size we don't use
// first remove it from the abandoned pages in the arena -- this waits for any readers to finish
_mi_arenas_page_unabandon(page);
_mi_heap_page_reclaim(tagheap, page);
mi_heap_stat_counter_increase(tagheap, pages_reclaim_on_free, 1);
return;
}
}
}
}
// The padding check may access the non-thread-owned page for the key values.
// that is safe as these are constant and the page won't be freed (as the block is not freed yet).
mi_check_padding(page, block);
// adjust stats (after padding check and potentially recursive `mi_free` above)
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free_size(block, mi_page_usable_size_of(page,block));
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
_mi_padding_shrink(page, block, sizeof(mi_block_t));
if (segment->page_kind == MI_PAGE_HUGE) {
#if MI_HUGE_PAGE_ABANDON
// huge page segments are always abandoned and can be freed immediately
_mi_segment_huge_page_free(segment, page, block);
// 3. if the page is unmapped, try to reabandon so it can possibly be mapped and found for allocations
if (!mi_page_is_used_at_frac(page,8) && // only reabandon if a full page starts to have enough blocks available to prevent immediate re-abandon of a full page
!mi_page_is_abandoned_mapped(page) && page->memid.memkind == MI_MEM_ARENA &&
_mi_arenas_page_try_reabandon_to_mapped(page))
{
return;
#else
// huge pages are special as they occupy the entire segment
// as these are large we reset the memory occupied by the page so it is available to other threads
// (as the owning thread needs to actually free the memory later).
_mi_segment_huge_page_reset(segment, page, block);
#endif
}
else {
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
memset(block, MI_DEBUG_FREED, mi_usable_size(block));
#endif
}
// and finally free the actual block by pushing it on the owning heap
// thread_delayed free list (or heap delayed free list)
mi_free_block_delayed_mt(page,block);
// not reclaimed or free'd, unown again
_mi_page_unown(page);
}
@ -316,9 +282,8 @@ static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* p
}
static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
if mi_unlikely(segment==NULL) return 0;
const mi_page_t* const page = _mi_segment_page_of(segment, p);
const mi_page_t* const page = mi_checked_ptr_page(p,msg);
if mi_unlikely(page==NULL) return 0;
if mi_likely(!mi_page_has_aligned(page)) {
const mi_block_t* block = (const mi_block_t*)p;
return mi_page_usable_size_of(page, block);
@ -513,21 +478,21 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
// only maintain stats for smaller objects if requested
#if (MI_STAT>0)
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
#if (MI_STAT < 2)
void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
#if (MI_STAT < 2)
MI_UNUSED(block);
#endif
#endif
mi_heap_t* const heap = mi_heap_get_default();
const size_t bsize = mi_page_usable_block_size(page);
#if (MI_STAT>1)
#if (MI_STAT>1)
const size_t usize = mi_page_usable_size_of(page, block);
mi_heap_stat_decrease(heap, malloc, usize);
#endif
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
#endif
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_decrease(heap, normal, bsize);
#if (MI_STAT > 1)
#if (MI_STAT > 1)
mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
#endif
#endif
}
else {
const size_t bpsize = mi_page_block_size(page); // match stat in page.c:mi_huge_page_alloc
@ -535,7 +500,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
}
}
#else
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(page); MI_UNUSED(block);
}
#endif
@ -553,7 +518,7 @@ static void mi_block_unguard(mi_page_t* page, mi_block_t* block, void* p) {
const size_t bsize = mi_page_block_size(page);
const size_t psize = _mi_os_page_size();
mi_assert_internal(bsize > psize);
mi_assert_internal(_mi_page_segment(page)->allow_decommit);
mi_assert_internal(!page->memid.is_pinned);
void* gpage = (uint8_t*)block + bsize - psize;
mi_assert_internal(_mi_is_aligned(gpage, psize));
_mi_os_unprotect(gpage, psize);

View file

@ -7,11 +7,8 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h" // mi_prim_get_default_heap
#include <string.h> // memset, memcpy
#if defined(_MSC_VER) && (_MSC_VER < 1920)
#pragma warning(disable:4204) // non-constant aggregate initializer
#endif
@ -58,8 +55,6 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
MI_UNUSED(arg2);
MI_UNUSED(pq);
mi_assert_internal(mi_page_heap(page) == heap);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == heap->thread_id);
mi_assert_expensive(_mi_page_is_valid(page));
return true;
}
@ -98,7 +93,7 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
if (mi_page_all_free(page)) {
// no more used blocks, free the page.
// note: this will free retired pages as well.
_mi_page_free(page, pq, collect >= MI_FORCE);
_mi_page_free(page, pq);
}
else if (collect == MI_ABANDON) {
// still used blocks but the thread is done; abandon the page
@ -107,14 +102,6 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
return true; // don't break
}
static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
MI_UNUSED(arg1);
MI_UNUSED(arg2);
MI_UNUSED(heap);
MI_UNUSED(pq);
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
return true; // don't break
}
static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
{
@ -124,49 +111,19 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
_mi_deferred_free(heap, force);
// python/cpython#112532: we may be called from a thread that is not the owner of the heap
const bool is_main_thread = (_mi_is_main_thread() && heap->thread_id == _mi_thread_id());
// note: never reclaim on collect but leave it to threads that need storage to reclaim
if (
#ifdef NDEBUG
collect == MI_FORCE
#else
collect >= MI_FORCE
#endif
&& is_main_thread && mi_heap_is_backing(heap) && !heap->no_reclaim)
{
// the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
// if all memory is freed by now, all segments should be freed.
// note: this only collects in the current subprocess
_mi_abandoned_reclaim_all(heap, &heap->tld->segments);
}
// if abandoning, mark all pages to no longer add to delayed_free
if (collect == MI_ABANDON) {
mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
}
// free all current thread delayed blocks.
// (if abandoning, after this there are no more thread-delayed references into the pages.)
_mi_heap_delayed_free_all(heap);
// const bool is_main_thread = (_mi_is_main_thread() && heap->thread_id == _mi_thread_id());
// collect retired pages
_mi_heap_collect_retired(heap, force);
// if (_mi_is_main_thread()) { mi_debug_show_arenas(true, false, false); }
// collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
// collect segments (purge pages, this can be expensive so don't force on abandonment)
_mi_segments_collect(collect == MI_FORCE, &heap->tld->segments);
// if forced, collect thread data cache on program-exit (or shared library unload)
if (force && is_main_thread && mi_heap_is_backing(heap)) {
_mi_thread_data_collect(); // collect thread data cache
}
// collect arenas (this is program wide so don't force purges on abandonment of threads)
_mi_arenas_collect(collect == MI_FORCE /* force purge? */);
// collect arenas (this is program wide so don't force purges on abandonment of threads)
//mi_atomic_storei64_release(&heap->tld->subproc->purge_expire, 1);
_mi_arenas_collect(collect == MI_FORCE /* force purge? */, true /* visit all? */, heap->tld);
}
void _mi_heap_collect_abandon(mi_heap_t* heap) {
@ -187,8 +144,12 @@ void mi_collect(bool force) mi_attr_noexcept {
----------------------------------------------------------- */
mi_heap_t* mi_heap_get_default(void) {
mi_thread_init();
return mi_prim_get_default_heap();
mi_heap_t* heap = mi_prim_get_default_heap();
if mi_unlikely(!mi_heap_is_initialized(heap)) {
mi_thread_init();
heap = mi_prim_get_default_heap();
}
return heap;
}
static bool mi_heap_is_default(const mi_heap_t* heap) {
@ -201,39 +162,77 @@ mi_heap_t* mi_heap_get_backing(void) {
mi_assert_internal(heap!=NULL);
mi_heap_t* bheap = heap->tld->heap_backing;
mi_assert_internal(bheap!=NULL);
mi_assert_internal(bheap->thread_id == _mi_thread_id());
mi_assert_internal(bheap->tld->thread_id == _mi_thread_id());
return bheap;
}
void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag) {
// todo: make order of parameters consistent (but would that break compat with CPython?)
void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint8_t heap_tag, mi_tld_t* tld)
{
mi_assert_internal(heap!=NULL);
mi_memid_t memid = heap->memid;
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
heap->tld = tld;
heap->thread_id = _mi_thread_id();
heap->arena_id = arena_id;
heap->no_reclaim = noreclaim;
heap->tag = tag;
if (heap == tld->heap_backing) {
heap->memid = memid;
heap->tld = tld; // avoid reading the thread-local tld during initialization
heap->exclusive_arena = _mi_arena_from_id(arena_id);
heap->allow_page_reclaim = !noreclaim;
heap->allow_page_abandon = (!noreclaim && mi_option_get(mi_option_page_full_retain) >= 0);
heap->full_page_retain = mi_option_get_clamp(mi_option_page_full_retain, -1, 32);
heap->tag = heap_tag;
if (heap->tld->is_in_threadpool) {
// if we run as part of a thread pool it is better to not arbitrarily reclaim abandoned pages into our heap.
// (but abandoning is good in this case)
heap->allow_page_reclaim = false;
// and halve the full page retain (possibly to 0)
if (heap->full_page_retain >= 0) {
heap->full_page_retain = heap->full_page_retain / 4;
}
}
if (heap->tld->heap_backing == NULL) {
heap->tld->heap_backing = heap; // first heap becomes the backing heap
_mi_random_init(&heap->random);
}
else {
_mi_random_split(&tld->heap_backing->random, &heap->random);
_mi_random_split(&heap->tld->heap_backing->random, &heap->random);
}
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->keys[0] = _mi_heap_random_next(heap);
heap->keys[1] = _mi_heap_random_next(heap);
//heap->keys[0] = _mi_heap_random_next(heap);
//heap->keys[1] = _mi_heap_random_next(heap);*/
_mi_heap_guarded_init(heap);
// push on the thread local heaps list
heap->next = heap->tld->heaps;
heap->tld->heaps = heap;
}
mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id, mi_tld_t* tld) {
mi_assert_internal(tld!=NULL);
mi_assert(heap_tag >= 0 && heap_tag < 256);
// allocate and initialize a heap
mi_memid_t memid;
mi_heap_t* heap;
if (arena_id == _mi_arena_id_none()) {
heap = (mi_heap_t*)_mi_meta_zalloc(sizeof(mi_heap_t), &memid);
}
else {
// heaps associated wita a specific arena are allocated in that arena
// note: takes up at least one slice which is quite wasteful...
heap = (mi_heap_t*)_mi_arenas_alloc(_mi_subproc(), _mi_align_up(sizeof(mi_heap_t),MI_ARENA_MIN_OBJ_SIZE), true, true, _mi_arena_from_id(arena_id), tld->thread_seq, &memid);
}
if (heap==NULL) {
_mi_error_message(ENOMEM, "unable to allocate heap meta-data\n");
return NULL;
}
heap->memid = memid;
_mi_heap_init(heap, arena_id, allow_destroy, (uint8_t)heap_tag, tld);
return heap;
}
mi_decl_nodiscard mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
if (heap == NULL) return NULL;
mi_assert(heap_tag >= 0 && heap_tag < 256);
_mi_heap_init(heap, bheap->tld, arena_id, allow_destroy /* no reclaim? */, (uint8_t)heap_tag /* heap tag */);
return heap;
mi_assert_internal(bheap != NULL);
return _mi_heap_create(heap_tag, allow_destroy, arena_id, bheap->tld);
}
mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
@ -246,7 +245,7 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
}
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) {
return _mi_arena_memid_is_suitable(memid, heap->arena_id);
return _mi_arena_memid_is_suitable(memid, heap->exclusive_arena);
}
uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
@ -258,14 +257,14 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
mi_assert_internal(mi_heap_is_initialized(heap));
// TODO: copy full empty heap instead?
memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
_mi_memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
_mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
heap->thread_delayed_free = NULL;
// heap->thread_delayed_free = NULL;
heap->page_count = 0;
}
// called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources.
static void mi_heap_free(mi_heap_t* heap) {
static void mi_heap_free(mi_heap_t* heap, bool do_free_mem) {
mi_assert(heap != NULL);
mi_assert_internal(mi_heap_is_initialized(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
@ -292,7 +291,9 @@ static void mi_heap_free(mi_heap_t* heap) {
mi_assert_internal(heap->tld->heaps != NULL);
// and free the used memory
mi_free(heap);
if (do_free_mem) {
_mi_meta_free(heap, sizeof(*heap), heap->memid);
}
}
// return a heap on the same thread as `heap` specialized for the specified tag (if it exists)
@ -319,24 +320,24 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
MI_UNUSED(pq);
// ensure no more thread_delayed_free will be added
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
//_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
// stats
const size_t bsize = mi_page_block_size(page);
if (bsize > MI_LARGE_OBJ_SIZE_MAX) {
if (bsize > MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_decrease(heap, huge, bsize);
}
#if (MI_STAT)
#if (MI_STAT)
_mi_page_free_collect(page, false); // update used count
const size_t inuse = page->used;
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_decrease(heap, normal, bsize * inuse);
#if (MI_STAT>1)
#if (MI_STAT>1)
mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], inuse);
#endif
#endif
}
mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks...
#endif
#endif
/// pretend it is all free now
mi_assert_internal(mi_page_thread_free(page) == NULL);
@ -346,7 +347,8 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
// mi_page_free(page,false);
page->next = NULL;
page->prev = NULL;
_mi_segment_page_free(page,false /* no force? */, &heap->tld->segments);
mi_page_set_heap(page, NULL);
_mi_arenas_page_free(page);
return true; // keep going
}
@ -367,7 +369,8 @@ static bool mi_cdecl mi_heap_track_block_free(const mi_heap_t* heap, const mi_he
void mi_heap_destroy(mi_heap_t* heap) {
mi_assert(heap != NULL);
mi_assert(mi_heap_is_initialized(heap));
mi_assert(heap->no_reclaim);
mi_assert(!heap->allow_page_reclaim);
mi_assert(!heap->allow_page_abandon);
mi_assert_expensive(mi_heap_is_valid(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
#if MI_GUARDED
@ -375,9 +378,9 @@ void mi_heap_destroy(mi_heap_t* heap) {
mi_heap_delete(heap);
return;
#else
if (!heap->no_reclaim) {
if (heap->allow_page_reclaim) {
_mi_warning_message("'mi_heap_destroy' called but ignored as the heap was not created with 'allow_destroy' (heap at %p)\n", heap);
// don't free in case it may contain reclaimed pages
// don't free in case it may contain reclaimed pages,
mi_heap_delete(heap);
}
else {
@ -387,7 +390,7 @@ void mi_heap_destroy(mi_heap_t* heap) {
#endif
// free all pages
_mi_heap_destroy_pages(heap);
mi_heap_free(heap);
mi_heap_free(heap,true);
}
#endif
}
@ -399,7 +402,7 @@ void _mi_heap_unsafe_destroy_all(mi_heap_t* heap) {
mi_heap_t* curr = heap->tld->heaps;
while (curr != NULL) {
mi_heap_t* next = curr->next;
if (curr->no_reclaim) {
if (!curr->allow_page_reclaim) {
mi_heap_destroy(curr);
}
else {
@ -414,44 +417,30 @@ void _mi_heap_unsafe_destroy_all(mi_heap_t* heap) {
----------------------------------------------------------- */
// Transfer the pages from one heap to the other
static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
mi_assert_internal(heap!=NULL);
if (from==NULL || from->page_count == 0) return;
//static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
// mi_assert_internal(heap!=NULL);
// if (from==NULL || from->page_count == 0) return;
//
// // transfer all pages by appending the queues; this will set a new heap field
// for (size_t i = 0; i <= MI_BIN_FULL; i++) {
// mi_page_queue_t* pq = &heap->pages[i];
// mi_page_queue_t* append = &from->pages[i];
// size_t pcount = _mi_page_queue_append(heap, pq, append);
// heap->page_count += pcount;
// from->page_count -= pcount;
// }
// mi_assert_internal(from->page_count == 0);
//
// // and reset the `from` heap
// mi_heap_reset_pages(from);
//}
// reduce the size of the delayed frees
_mi_heap_delayed_free_partial(from);
// transfer all pages by appending the queues; this will set a new heap field
// so threads may do delayed frees in either heap for a while.
// note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state
// so after this only the new heap will get delayed frees
for (size_t i = 0; i <= MI_BIN_FULL; i++) {
mi_page_queue_t* pq = &heap->pages[i];
mi_page_queue_t* append = &from->pages[i];
size_t pcount = _mi_page_queue_append(heap, pq, append);
heap->page_count += pcount;
from->page_count -= pcount;
}
mi_assert_internal(from->page_count == 0);
// and do outstanding delayed frees in the `from` heap
// note: be careful here as the `heap` field in all those pages no longer point to `from`,
// turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a
// the regular `_mi_free_delayed_block` which is safe.
_mi_heap_delayed_free_all(from);
#if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL);
#endif
// and reset the `from` heap
mi_heap_reset_pages(from);
}
// are two heaps compatible with respect to heap-tag, exclusive arena etc.
static bool mi_heaps_are_compatible(mi_heap_t* heap1, mi_heap_t* heap2) {
return (heap1->tag == heap2->tag && // store same kind of objects
heap1->arena_id == heap2->arena_id); // same arena preference
}
//// are two heaps compatible with respect to heap-tag, exclusive arena etc.
//static bool mi_heaps_are_compatible(mi_heap_t* heap1, mi_heap_t* heap2) {
// return (heap1->tag == heap2->tag && // store same kind of objects
// heap1->tld->subproc == heap2->tld->subproc && // same sub-process
// heap1->arena_id == heap2->arena_id); // same arena preference
//}
// Safe delete a heap without freeing any still allocated blocks in that heap.
void mi_heap_delete(mi_heap_t* heap)
@ -461,17 +450,11 @@ void mi_heap_delete(mi_heap_t* heap)
mi_assert_expensive(mi_heap_is_valid(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
mi_heap_t* bheap = heap->tld->heap_backing;
if (bheap != heap && mi_heaps_are_compatible(bheap,heap)) {
// transfer still used pages to the backing heap
mi_heap_absorb(bheap, heap);
}
else {
// the backing heap abandons its pages
_mi_heap_collect_abandon(heap);
}
// abandon all pages
_mi_heap_collect_abandon(heap);
mi_assert_internal(heap->page_count==0);
mi_heap_free(heap);
mi_heap_free(heap,true);
}
mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
@ -485,7 +468,63 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
}
/* -----------------------------------------------------------
Load/unload heaps
----------------------------------------------------------- */
void mi_heap_unload(mi_heap_t* heap) {
mi_assert(mi_heap_is_initialized(heap));
mi_assert_expensive(mi_heap_is_valid(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
if (heap->exclusive_arena == NULL) {
_mi_warning_message("cannot unload heaps that are not associated with an exclusive arena\n");
return;
}
// abandon all pages so all thread'id in the pages are cleared
_mi_heap_collect_abandon(heap);
mi_assert_internal(heap->page_count==0);
// remove from heap list
mi_heap_free(heap, false /* but don't actually free the memory */);
// disassociate from the current thread-local and static state
heap->tld = NULL;
return;
}
bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena_id) {
mi_assert(mi_heap_is_initialized(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return false;
if (heap->exclusive_arena == NULL) {
_mi_warning_message("cannot reload heaps that were not associated with an exclusive arena\n");
return false;
}
if (heap->tld != NULL) {
_mi_warning_message("cannot reload heaps that were not unloaded first\n");
return false;
}
mi_arena_t* arena = _mi_arena_from_id(arena_id);
if (heap->exclusive_arena != arena) {
_mi_warning_message("trying to reload a heap at a different arena address: %p vs %p\n", heap->exclusive_arena, arena);
return false;
}
mi_assert_internal(heap->page_count==0);
// re-associate with the current thread-local and static state
heap->tld = mi_heap_get_default()->tld;
// reinit direct pages (as we may be in a different process)
mi_assert_internal(heap->page_count == 0);
for (size_t i = 0; i < MI_PAGES_DIRECT; i++) {
heap->pages_free_direct[i] = (mi_page_t*)&_mi_page_empty;
}
// push on the thread local heaps list
heap->next = heap->tld->heaps;
heap->tld->heaps = heap;
return true;
}
/* -----------------------------------------------------------
Analysis
@ -494,11 +533,8 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
// static since it is not thread safe to access heaps from other threads.
static mi_heap_t* mi_heap_of_block(const void* p) {
if (p == NULL) return NULL;
mi_segment_t* segment = _mi_ptr_segment(p);
bool valid = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(valid);
if mi_unlikely(!valid) return NULL;
return mi_page_heap(_mi_segment_page_of(segment,p));
mi_page_t* page = _mi_ptr_page(p); // TODO: check pointer validity?
return mi_page_heap(page);
}
bool mi_heap_contains_block(mi_heap_t* heap, const void* p) {
@ -573,7 +609,7 @@ bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_
if (page->used == 0) return true;
size_t psize;
uint8_t* const pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
uint8_t* const pstart = mi_page_area(page, &psize);
mi_heap_t* const heap = mi_page_heap(page);
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page); // without padding

View file

@ -11,32 +11,31 @@ terms of the MIT license. A copy of the license can be found in the file
#include <string.h> // memcpy, memset
#include <stdlib.h> // atexit
#define MI_MEMID_INIT(kind) {{{NULL,0}}, kind, true /* pinned */, true /* committed */, false /* zero */ }
#define MI_MEMID_STATIC MI_MEMID_INIT(MI_MEM_STATIC)
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
0,
false, false, false, false,
0, // capacity
0, // reserved capacity
{ 0 }, // flags
false, // is_zero
0, // retire_expire
NULL, // free
NULL, // local_free
0, // used
0, // block size shift
0, // heap tag
0, // block_size
NULL, // page_start
MI_ATOMIC_VAR_INIT(MI_PAGE_IN_FULL_QUEUE), // xthread_id (must set flag to catch NULL on a free)
NULL, // free
0, // used
0, // capacity
0, // reserved capacity
0, // block size shift
0, // retire_expire
NULL, // local_free
MI_ATOMIC_VAR_INIT(0), // xthread_free
0, // block_size
NULL, // page_start
0, // heap tag
false, // is_zero
#if (MI_PADDING || MI_ENCODE_FREELIST)
{ 0, 0 },
#endif
MI_ATOMIC_VAR_INIT(0), // xthread_free
MI_ATOMIC_VAR_INIT(0), // xheap
NULL, NULL
#if MI_INTPTR_SIZE==4
, { NULL }
{ 0, 0 }, // keys
#endif
NULL, // xheap
NULL, NULL, // next, prev
MI_ARENA_SLICE_SIZE, // page_committed
MI_MEMID_STATIC // memid
};
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
@ -63,8 +62,8 @@ const mi_page_t _mi_page_empty = {
QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \
QNULL(MI_LARGE_OBJ_WSIZE_MAX + 1 /* 655360, Huge queue */), \
QNULL(MI_LARGE_OBJ_WSIZE_MAX + 2) /* Full queue */ }
QNULL(MI_LARGE_MAX_OBJ_WSIZE + 1 /* 655360, Huge queue */), \
QNULL(MI_LARGE_MAX_OBJ_WSIZE + 2) /* Full queue */ }
#define MI_STAT_COUNT_NULL() {0,0,0,0}
@ -82,12 +81,10 @@ const mi_page_t _mi_page_empty = {
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), \
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
{ 0, 0 } \
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \
MI_STAT_COUNT_END_NULL()
// --------------------------------------------------------
@ -99,24 +96,83 @@ const mi_page_t _mi_page_empty = {
// may lead to allocation itself on some platforms)
// --------------------------------------------------------
static mi_decl_cache_align mi_subproc_t subproc_main
#if __cplusplus
= { }; // empty initializer to prevent running the constructor (with msvc)
#else
= { 0 }; // C zero initialize
#endif
static mi_decl_cache_align mi_tld_t tld_empty = {
0, // thread_id
0, // thread_seq
&subproc_main, // subproc
NULL, // heap_backing
NULL, // heaps list
0, // heartbeat
false, // recurse
false, // is_in_threadpool
{ MI_STATS_NULL }, // stats
MI_MEMID_STATIC // memid
};
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
NULL,
MI_ATOMIC_VAR_INIT(NULL),
0, // tid
0, // cookie
0, // arena id
{ 0, 0 }, // keys
{ {0}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
NULL, // next
false, // can reclaim
0, // tag
&tld_empty, // tld
NULL, // exclusive_arena
0, // cookie
//{ 0, 0 }, // keys
{ {0}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
0, // generic count
NULL, // next
0, // full page retain
false, // can reclaim
true, // can eager abandon
0, // tag
#if MI_GUARDED
0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`)
0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`)
#endif
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY
MI_PAGE_QUEUES_EMPTY,
MI_MEMID_STATIC
};
extern mi_heap_t heap_main;
static mi_decl_cache_align mi_tld_t tld_main = {
0, // thread_id
0, // thread_seq
&subproc_main, // subproc
&heap_main, // heap_backing
&heap_main, // heaps list
0, // heartbeat
false, // recurse
false, // is_in_threadpool
{ MI_STATS_NULL }, // stats
MI_MEMID_STATIC // memid
};
mi_decl_cache_align mi_heap_t heap_main = {
&tld_main, // thread local data
NULL, // exclusive arena
0, // initial cookie
//{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0x846ca68b}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
0, // generic count
NULL, // next heap
2, // full page retain
true, // allow page reclaim
true, // allow page abandon
0, // tag
#if MI_GUARDED
0, 0, 0, 0, 0,
#endif
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
MI_MEMID_STATIC
};
@ -127,39 +183,6 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
// the thread-local default heap for allocation
mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
extern mi_heap_t _mi_heap_main;
static mi_decl_cache_align mi_subproc_t mi_subproc_default;
static mi_decl_cache_align mi_tld_t tld_main = {
0, false,
&_mi_heap_main, &_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0},
0, 0, 0, 0, 0, &mi_subproc_default,
&tld_main.stats
}, // segments
{ MI_STATS_NULL } // stats
};
mi_decl_cache_align mi_heap_t _mi_heap_main = {
&tld_main,
MI_ATOMIC_VAR_INIT(NULL),
0, // thread id
0, // initial cookie
0, // arena id
{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0x846ca68b}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
NULL, // next heap
false, // can reclaim
0, // tag
#if MI_GUARDED
0, 0, 0, 0, 0,
#endif
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY
};
bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
@ -175,7 +198,7 @@ mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t samp
if (heap->guarded_sample_rate >= 1) {
heap->guarded_sample_seed = heap->guarded_sample_seed % heap->guarded_sample_rate;
}
heap->guarded_sample_count = heap->guarded_sample_seed; // count down samples
heap->guarded_sample_count = 1 + heap->guarded_sample_seed; // count down samples
}
mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) {
@ -204,28 +227,132 @@ void _mi_heap_guarded_init(mi_heap_t* heap) {
}
#endif
static void mi_heap_main_init(void) {
if (_mi_heap_main.cookie == 0) {
_mi_heap_main.thread_id = _mi_thread_id();
_mi_heap_main.cookie = 1;
#if defined(_WIN32) && !defined(MI_SHARED_LIB)
_mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking
#else
_mi_random_init(&_mi_heap_main.random);
#endif
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
mi_lock_init(&mi_subproc_default.abandoned_os_lock);
mi_lock_init(&mi_subproc_default.abandoned_os_visit_lock);
_mi_heap_guarded_init(&_mi_heap_main);
// Initialize main subproc
static void mi_subproc_main_init(void) {
if (subproc_main.memid.memkind != MI_MEM_STATIC) {
subproc_main.memid = _mi_memid_create(MI_MEM_STATIC);
mi_lock_init(&subproc_main.os_abandoned_pages_lock);
mi_lock_init(&subproc_main.arena_reserve_lock);
}
}
mi_heap_t* _mi_heap_main_get(void) {
// Initialize main tld
static void mi_tld_main_init(void) {
if (tld_main.thread_id == 0) {
tld_main.thread_id = _mi_prim_thread_id();
}
}
// Initialization of the (statically allocated) main heap, and the main tld and subproc.
static void mi_heap_main_init(void) {
if (heap_main.cookie == 0) {
mi_subproc_main_init();
mi_tld_main_init();
// heap
heap_main.cookie = 1;
#if defined(__APPLE__) || defined(_WIN32) && !defined(MI_SHARED_LIB)
_mi_random_init_weak(&heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking
#else
_mi_random_init(&heap_main.random);
#endif
heap_main.cookie = _mi_heap_random_next(&heap_main);
//heap_main.keys[0] = _mi_heap_random_next(&heap_main);
//heap_main.keys[1] = _mi_heap_random_next(&heap_main);
_mi_heap_guarded_init(&heap_main);
heap_main.allow_page_abandon = (mi_option_get(mi_option_page_full_retain) >= 0);
heap_main.full_page_retain = mi_option_get_clamp(mi_option_page_full_retain, -1, 32);
}
}
mi_heap_t* heap_main_get(void) {
mi_heap_main_init();
return &_mi_heap_main;
return &heap_main;
}
/* -----------------------------------------------------------
Thread local data
----------------------------------------------------------- */
// Count current and total created threads
static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1);
static _Atomic(size_t) thread_total_count;
size_t _mi_current_thread_count(void) {
return mi_atomic_load_relaxed(&thread_count);
}
// The mimalloc thread local data
mi_decl_thread mi_tld_t* thread_tld = &tld_empty;
// Allocate fresh tld
static mi_tld_t* mi_tld_alloc(void) {
mi_atomic_increment_relaxed(&thread_count);
if (_mi_is_main_thread()) {
return &tld_main;
}
else {
// allocate tld meta-data
// note: we need to be careful to not access the tld from `_mi_meta_zalloc`
// (and in turn from `_mi_arena_alloc_aligned` and `_mi_os_alloc_aligned`).
mi_memid_t memid;
mi_tld_t* tld = (mi_tld_t*)_mi_meta_zalloc(sizeof(mi_tld_t), &memid);
if (tld==NULL) {
_mi_error_message(ENOMEM, "unable to allocate memory for thread local data\n");
return NULL;
}
tld->memid = memid;
tld->heap_backing = NULL;
tld->heaps = NULL;
tld->subproc = &subproc_main;
tld->thread_id = _mi_prim_thread_id();
tld->thread_seq = mi_atomic_add_acq_rel(&thread_total_count, 1);
tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool();
return tld;
}
}
#define MI_TLD_INVALID ((mi_tld_t*)1)
mi_decl_noinline static void mi_tld_free(mi_tld_t* tld) {
if (tld != NULL && tld != MI_TLD_INVALID) {
_mi_stats_done(&tld->stats);
_mi_meta_free(tld, sizeof(mi_tld_t), tld->memid);
}
#if 0
// do not read/write to `thread_tld` on older macOS <= 14 as that will re-initialize the thread local storage
// (since we are calling this during pthread shutdown)
// (and this could happen on other systems as well, so let's never do it)
thread_tld = MI_TLD_INVALID;
#endif
mi_atomic_decrement_relaxed(&thread_count);
}
static mi_tld_t* mi_tld(void) {
mi_tld_t* tld = thread_tld;
if (tld == MI_TLD_INVALID) {
_mi_error_message(EFAULT, "internal error: tld is accessed after the thread terminated\n");
thread_tld = &tld_empty;
}
if (tld==&tld_empty) {
thread_tld = tld = mi_tld_alloc();
}
return tld;
}
mi_subproc_t* _mi_subproc(void) {
// should work without doing initialization (as it may be called from `_mi_tld -> mi_tld_alloc ... -> os_alloc -> _mi_subproc()`
// todo: this will still fail on OS systems where the first access to a thread-local causes allocation.
// on such systems we can check for this with the _mi_prim_get_default_heap as those are protected (by being
// stored in a TLS slot for example)
mi_heap_t* heap = mi_prim_get_default_heap();
if (heap == NULL) {
return _mi_subproc_main();
}
else {
return heap->tld->subproc; // avoid using thread local storage (`thread_tld`)
}
}
@ -233,179 +360,99 @@ mi_heap_t* _mi_heap_main_get(void) {
Sub process
----------------------------------------------------------- */
mi_subproc_t* _mi_subproc_main(void) {
return &subproc_main;
}
mi_subproc_id_t mi_subproc_main(void) {
return NULL;
}
mi_subproc_id_t mi_subproc_new(void) {
mi_memid_t memid = _mi_memid_none();
mi_subproc_t* subproc = (mi_subproc_t*)_mi_arena_meta_zalloc(sizeof(mi_subproc_t), &memid);
mi_memid_t memid;
mi_subproc_t* subproc = (mi_subproc_t*)_mi_meta_zalloc(sizeof(mi_subproc_t),&memid);
if (subproc == NULL) return NULL;
subproc->memid = memid;
subproc->abandoned_os_list = NULL;
mi_lock_init(&subproc->abandoned_os_lock);
mi_lock_init(&subproc->abandoned_os_visit_lock);
mi_lock_init(&subproc->os_abandoned_pages_lock);
mi_lock_init(&subproc->arena_reserve_lock);
return subproc;
}
mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id) {
return (subproc_id == NULL ? &mi_subproc_default : (mi_subproc_t*)subproc_id);
return (subproc_id == NULL ? &subproc_main : (mi_subproc_t*)subproc_id);
}
void mi_subproc_delete(mi_subproc_id_t subproc_id) {
if (subproc_id == NULL) return;
mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
// check if there are no abandoned segments still..
// check if there are os pages still..
bool safe_to_delete = false;
mi_lock(&subproc->abandoned_os_lock) {
if (subproc->abandoned_os_list == NULL) {
mi_lock(&subproc->os_abandoned_pages_lock) {
if (subproc->os_abandoned_pages == NULL) {
safe_to_delete = true;
}
}
if (!safe_to_delete) return;
// merge stats back into the main subproc?
_mi_stats_merge_from(&_mi_subproc_main()->stats, &subproc->stats);
// safe to release
// todo: should we refcount subprocesses?
mi_lock_done(&subproc->abandoned_os_lock);
mi_lock_done(&subproc->abandoned_os_visit_lock);
_mi_arena_meta_free(subproc, subproc->memid, sizeof(mi_subproc_t));
mi_lock_done(&subproc->os_abandoned_pages_lock);
mi_lock_done(&subproc->arena_reserve_lock);
_mi_meta_free(subproc, sizeof(mi_subproc_t), subproc->memid);
}
void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) {
mi_heap_t* heap = mi_heap_get_default();
if (heap == NULL) return;
mi_assert(heap->tld->segments.subproc == &mi_subproc_default);
if (heap->tld->segments.subproc != &mi_subproc_default) return;
heap->tld->segments.subproc = _mi_subproc_from_id(subproc_id);
mi_tld_t* tld = mi_tld();
if (tld == NULL) return;
mi_assert(tld->subproc == &subproc_main);
if (tld->subproc != &subproc_main) return;
tld->subproc = _mi_subproc_from_id(subproc_id);
}
/* -----------------------------------------------------------
Initialization and freeing of the thread local heaps
Allocate heap data
----------------------------------------------------------- */
// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size).
typedef struct mi_thread_data_s {
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
mi_tld_t tld;
mi_memid_t memid; // must come last due to zero'ing
} mi_thread_data_t;
// Thread meta-data is allocated directly from the OS. For
// some programs that do not use thread pools and allocate and
// destroy many OS threads, this may causes too much overhead
// per thread so we maintain a small cache of recently freed metadata.
#define TD_CACHE_SIZE (32)
static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE];
static mi_thread_data_t* mi_thread_data_zalloc(void) {
// try to find thread metadata in the cache
bool is_zero = false;
mi_thread_data_t* td = NULL;
for (int i = 0; i < TD_CACHE_SIZE; i++) {
td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
if (td != NULL) {
// found cached allocation, try use it
td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
if (td != NULL) {
break;
}
}
}
// if that fails, allocate as meta data
if (td == NULL) {
mi_memid_t memid;
td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid);
if (td == NULL) {
// if this fails, try once more. (issue #257)
td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid);
if (td == NULL) {
// really out of memory
_mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
}
}
if (td != NULL) {
td->memid = memid;
is_zero = memid.initially_zero;
}
}
if (td != NULL && !is_zero) {
_mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid));
}
return td;
}
static void mi_thread_data_free( mi_thread_data_t* tdfree ) {
// try to add the thread metadata to the cache
for (int i = 0; i < TD_CACHE_SIZE; i++) {
mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
if (td == NULL) {
mi_thread_data_t* expected = NULL;
if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) {
return;
}
}
}
// if that fails, just free it directly
_mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid);
}
void _mi_thread_data_collect(void) {
// free all thread metadata from the cache
for (int i = 0; i < TD_CACHE_SIZE; i++) {
mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
if (td != NULL) {
td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
if (td != NULL) {
_mi_os_free(td, sizeof(mi_thread_data_t), td->memid);
}
}
}
}
// Initialize the thread local default heap, called from `mi_thread_init`
static bool _mi_thread_heap_init(void) {
if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true;
if (_mi_is_main_thread()) {
// mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization
// mi_assert_internal(heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization
// the main heap is statically allocated
mi_heap_main_init();
_mi_heap_set_default_direct(&_mi_heap_main);
_mi_heap_set_default_direct(&heap_main);
//mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap());
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
mi_thread_data_t* td = mi_thread_data_zalloc();
if (td == NULL) return false;
// allocates tld data
// note: we cannot access thread-locals yet as that can cause (recursive) allocation
// (on macOS <= 14 for example where the loader allocates thread-local data on demand).
mi_tld_t* tld = mi_tld_alloc();
mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap;
_mi_tld_init(tld, heap); // must be before `_mi_heap_init`
_mi_heap_init(heap, tld, _mi_arena_id_none(), false /* can reclaim */, 0 /* default tag */);
// allocate and initialize the heap
mi_heap_t* heap = _mi_heap_create(0 /* default tag */, false /* allow destroy? */, _mi_arena_id_none(), tld);
// associate the heap with this thread
// (this is safe, on macOS for example, the heap is set in a dedicated TLS slot and thus does not cause recursive allocation)
_mi_heap_set_default_direct(heap);
// now that the heap is set for this thread, we can set the thread-local tld.
thread_tld = tld;
}
return false;
}
// initialize thread local data
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
_mi_memzero_aligned(tld,sizeof(mi_tld_t));
tld->heap_backing = bheap;
tld->heaps = NULL;
tld->segments.subproc = &mi_subproc_default;
tld->segments.stats = &tld->stats;
}
// Free the thread local default heap (called from `mi_thread_done`)
static bool _mi_thread_heap_done(mi_heap_t* heap) {
if (!mi_heap_is_initialized(heap)) return true;
// reset default heap
_mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty);
_mi_heap_set_default_direct(_mi_is_main_thread() ? &heap_main : (mi_heap_t*)&_mi_heap_empty);
// switch to backing heap
heap = heap->tld->heap_backing;
@ -425,26 +472,22 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) {
mi_assert_internal(mi_heap_is_backing(heap));
// collect if not the main thread
if (heap != &_mi_heap_main) {
if (heap != &heap_main) {
_mi_heap_collect_abandon(heap);
}
// merge stats
_mi_stats_done(&heap->tld->stats);
// free heap meta data
_mi_meta_free(heap, sizeof(mi_heap_t), heap->memid);
// free if not the main thread
if (heap != &_mi_heap_main) {
mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id());
mi_thread_data_free((mi_thread_data_t*)heap);
}
else {
if (heap == &heap_main) {
#if 0
// never free the main thread even in debug mode; if a dll is linked statically with mimalloc,
// there may still be delete/free calls after the mi_fls_done is called. Issue #207
_mi_heap_destroy_pages(heap);
mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main);
mi_assert_internal(heap->tld->heap_backing == &heap_main);
#endif
}
return false;
}
@ -458,7 +501,7 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) {
// 1. windows dynamic library:
// call from DllMain on DLL_THREAD_DETACH
// 2. windows static library:
// use `FlsAlloc` to call a destructor when the thread is done
// use special linker section to call a destructor when the thread is done
// 3. unix, pthreads:
// use a pthread key to call a destructor when a pthread is done
//
@ -472,19 +515,14 @@ static void mi_process_setup_auto_thread_done(void) {
if (tls_initialized) return;
tls_initialized = true;
_mi_prim_thread_init_auto_done();
_mi_heap_set_default_direct(&_mi_heap_main);
_mi_heap_set_default_direct(&heap_main);
}
bool _mi_is_main_thread(void) {
return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id());
return (tld_main.thread_id==0 || tld_main.thread_id == _mi_thread_id());
}
static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1);
size_t _mi_current_thread_count(void) {
return mi_atomic_load_relaxed(&thread_count);
}
// This is called from the `mi_malloc_generic`
void mi_thread_init(void) mi_attr_noexcept
@ -497,8 +535,7 @@ void mi_thread_init(void) mi_attr_noexcept
// fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called)
if (_mi_thread_heap_init()) return; // returns true if already initialized
_mi_stat_increase(&_mi_stats_main.threads, 1);
mi_atomic_increment_relaxed(&thread_count);
mi_subproc_stat_increase(_mi_subproc_main(), threads, 1);
//_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
}
@ -520,14 +557,18 @@ void _mi_thread_done(mi_heap_t* heap)
}
// adjust stats
mi_atomic_decrement_relaxed(&thread_count);
_mi_stat_decrease(&_mi_stats_main.threads, 1);
mi_subproc_stat_decrease(_mi_subproc_main(), threads, 1);
// check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps...
if (heap->thread_id != _mi_thread_id()) return;
if (heap->tld->thread_id != _mi_prim_thread_id()) return;
// abandon the thread local heap
if (_mi_thread_heap_done(heap)) return; // returns true if already ran
// note: we store the tld as we should avoid reading `thread_tld` at this point (to avoid reinitializing the thread local storage)
mi_tld_t* tld = heap->tld;
_mi_thread_heap_done(heap); // returns true if already ran
// free thread local data
mi_tld_free(tld);
}
void _mi_heap_set_default_direct(mi_heap_t* heap) {
@ -580,7 +621,7 @@ void _mi_process_load(void) {
}
// reseed random
_mi_random_reinit_if_weak(&_mi_heap_main.random);
_mi_random_reinit_if_weak(&heap_main.random);
}
#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
@ -607,7 +648,7 @@ void mi_process_init(void) mi_attr_noexcept {
// ensure we are called once
static mi_atomic_once_t process_init;
#if _MSC_VER < 1920
mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main
mi_heap_main_init(); // vs2017 can dynamically re-initialize heap_main
#endif
if (!mi_atomic_once(&process_init)) return;
_mi_process_is_initialized = true;
@ -615,8 +656,11 @@ void mi_process_init(void) mi_attr_noexcept {
mi_process_setup_auto_thread_done();
mi_detect_cpu_features();
_mi_os_init();
mi_subproc_main_init();
mi_tld_main_init();
mi_heap_main_init();
_mi_os_init();
_mi_page_map_init();
#if MI_DEBUG
_mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif
@ -627,7 +671,7 @@ void mi_process_init(void) mi_attr_noexcept {
#endif
mi_thread_init();
#if defined(_WIN32)
#if defined(_WIN32) && defined(MI_WIN_USE_FLS)
// On windows, when building as a static lib the FLS cleanup happens to early for the main thread.
// To avoid this, set the FLS value for the main thread to NULL so the fls cleanup
// will not call _mi_thread_done on the (still executing) main thread. See issue #508.
@ -686,15 +730,14 @@ void mi_cdecl _mi_process_done(void) {
if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
mi_heap_collect(heap, true /* force */);
_mi_heap_unsafe_destroy_all(heap); // forcefully release all memory held by all heaps (of this thread only!)
_mi_arena_unsafe_destroy_all();
_mi_segment_map_unsafe_destroy();
_mi_arenas_unsafe_destroy_all(heap->tld);
}
if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
mi_stats_print(NULL);
}
_mi_allocator_done();
_mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id);
_mi_verbose_message("process done: 0x%zx\n", tld_main.thread_id);
os_preloading = true; // don't call the C runtime anymore
}

View file

@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file
// --------------------------------------------------------
// This module defines various std libc functions to reduce
// the dependency on libc, and also prevent errors caused
// the dependency on libc, and also prevent errors caused
// by some libc implementations when called before `main`
// executes (due to malloc redirection)
// --------------------------------------------------------
@ -83,9 +83,9 @@ bool _mi_getenv(const char* name, char* result, size_t result_size) {
// Define our own limited `_mi_vsnprintf` and `_mi_snprintf`
// This is mostly to avoid calling these when libc is not yet
// initialized (and to reduce dependencies)
//
// format: d i, p x u, s
// prec: z l ll L
//
// format: d i, p, x, u, s
// type: z l ll L
// width: 10
// align-left: -
// fill: 0
@ -130,7 +130,7 @@ static void mi_out_alignright(char fill, char* start, size_t len, size_t extra,
}
static void mi_out_num(uintmax_t x, size_t base, char prefix, char** out, char* end)
static void mi_out_num(uintmax_t x, size_t base, char prefix, char** out, char* end)
{
if (x == 0 || base == 0 || base > 16) {
if (prefix != 0) { mi_outc(prefix, out, end); }
@ -144,8 +144,8 @@ static void mi_out_num(uintmax_t x, size_t base, char prefix, char** out, char*
mi_outc((digit <= 9 ? '0' + digit : 'A' + digit - 10),out,end);
x = x / base;
}
if (prefix != 0) {
mi_outc(prefix, out, end);
if (prefix != 0) {
mi_outc(prefix, out, end);
}
size_t len = *out - start;
// and reverse in-place
@ -171,7 +171,18 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
char c;
MI_NEXTC();
if (c != '%') {
if ((c >= ' ' && c <= '~') || c=='\n' || c=='\r' || c=='\t') { // output visible ascii or standard control only
if (c == '\\') {
MI_NEXTC();
switch (c) {
case 'e': mi_outc('\x1B', &out, end); break;
case 't': mi_outc('\t', &out, end); break;
case 'n': mi_outc('\n', &out, end); break;
case 'r': mi_outc('\r', &out, end); break;
case '\\': mi_outc('\\', &out, end); break;
default: /* ignore */ break;
}
}
else if ((c >= ' ' && c <= '~') || c=='\n' || c=='\r' || c=='\t' || c=='\x1b') { // output visible ascii or standard control only
mi_outc(c, &out, end);
}
}
@ -181,7 +192,7 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
size_t width = 0;
char numtype = 'd';
char numplus = 0;
bool alignright = true;
bool alignright = true;
if (c == '+' || c == ' ') { numplus = c; MI_NEXTC(); }
if (c == '-') { alignright = false; MI_NEXTC(); }
if (c == '0') { fill = '0'; MI_NEXTC(); }
@ -191,7 +202,7 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
width = (10 * width) + (c - '0'); MI_NEXTC();
}
if (c == 0) break; // extra check due to while
}
}
if (c == 'z' || c == 't' || c == 'L') { numtype = c; MI_NEXTC(); }
else if (c == 'l') {
numtype = c; MI_NEXTC();
@ -199,7 +210,10 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
}
char* start = out;
if (c == 's') {
if (c == '%') {
mi_outc('%', &out, end);
}
else if (c == 's') {
// string
const char* s = va_arg(args, const char*);
mi_outs(s, &out, end);
@ -273,3 +287,127 @@ void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
_mi_vsnprintf(buf, buflen, fmt, args);
va_end(args);
}
// --------------------------------------------------------
// generic trailing and leading zero count, and popcount
// --------------------------------------------------------
#if !MI_HAS_FAST_BITSCAN
static size_t mi_ctz_generic32(uint32_t x) {
// de Bruijn multiplication, see <http://keithandkatie.com/keith/papers/debruijn.html>
static const uint8_t debruijn[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
if (x==0) return 32;
return debruijn[(uint32_t)((x & -(int32_t)x) * (uint32_t)(0x077CB531U)) >> 27];
}
static size_t mi_clz_generic32(uint32_t x) {
// de Bruijn multiplication, see <http://keithandkatie.com/keith/papers/debruijn.html>
static const uint8_t debruijn[32] = {
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
};
if (x==0) return 32;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return debruijn[(uint32_t)(x * (uint32_t)(0x07C4ACDDU)) >> 27];
}
size_t _mi_ctz_generic(size_t x) {
if (x==0) return MI_SIZE_BITS;
#if (MI_SIZE_BITS <= 32)
return mi_ctz_generic32((uint32_t)x);
#else
const uint32_t lo = (uint32_t)x;
if (lo != 0) {
return mi_ctz_generic32(lo);
}
else {
return (32 + mi_ctz_generic32((uint32_t)(x>>32)));
}
#endif
}
size_t _mi_clz_generic(size_t x) {
if (x==0) return MI_SIZE_BITS;
#if (MI_SIZE_BITS <= 32)
return mi_clz_generic32((uint32_t)x);
#else
const uint32_t hi = (uint32_t)(x>>32);
if (hi != 0) {
return mi_clz_generic32(hi);
}
else {
return 32 + mi_clz_generic32((uint32_t)x);
}
#endif
}
#endif // bit scan
#if !MI_HAS_FAST_POPCOUNT
#if MI_SIZE_SIZE == 4
#define mi_mask_even_bits32 (0x55555555)
#define mi_mask_even_pairs32 (0x33333333)
#define mi_mask_even_nibbles32 (0x0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum32(uint32_t x) {
// perform `x * 0x01010101`: the highest byte contains the sum of all bytes.
x += (x << 8);
x += (x << 16);
return (size_t)(x >> 24);
}
static size_t mi_popcount_generic32(uint32_t x) {
// first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10
// in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair
// into the lower bit-pair:
x = x - ((x >> 1) & mi_mask_even_bits32);
// add the 2-bit pair results
x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32);
// add the 4-bit nibble results
x = (x + (x >> 4)) & mi_mask_even_nibbles32;
// each byte now has a count of its bits, we can sum them now:
return mi_byte_sum32(x);
}
size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic32(x);
}
#else
#define mi_mask_even_bits64 (0x5555555555555555)
#define mi_mask_even_pairs64 (0x3333333333333333)
#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum64(uint64_t x) {
x += (x << 8);
x += (x << 16);
x += (x << 32);
return (size_t)(x >> 56);
}
static size_t mi_popcount_generic64(uint64_t x) {
x = x - ((x >> 1) & mi_mask_even_bits64);
x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64);
x = (x + (x >> 4)) & mi_mask_even_nibbles64;
return mi_byte_sum64(x);
}
size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic64(x);
}
#endif
#endif // popcount

View file

@ -102,6 +102,14 @@ typedef struct mi_option_desc_s {
#endif
#endif
#ifndef MI_DEFAULT_PAGEMAP_COMMIT
#if defined(__APPLE__) // when overloading malloc, we still get mixed pointers sometimes on macOS; this avoids a bad access
#define MI_DEFAULT_PAGEMAP_COMMIT 1
#else
#define MI_DEFAULT_PAGEMAP_COMMIT 0
#endif
#endif
static mi_option_desc_t options[_mi_option_last] =
{
@ -136,7 +144,7 @@ static mi_option_desc_t options[_mi_option_last] =
#else
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
#endif
{ 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds
{ 2500,UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
@ -145,9 +153,8 @@ static mi_option_desc_t options[_mi_option_last] =
{ 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments to be reclaimed per try.
{ 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
{ MI_DEFAULT_ARENA_RESERVE, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`)
{ 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's
{ 1, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's
{ 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
{ 0, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free
{ MI_DEFAULT_DISALLOW_ARENA_ALLOC, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
{ 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
#if defined(MI_VISIT_ABANDONED)
@ -162,6 +169,13 @@ static mi_option_desc_t options[_mi_option_last] =
UNINIT, MI_OPTION(guarded_sample_rate)}, // 1 out of N allocations in the min/max range will be guarded (=4000)
{ 0, UNINIT, MI_OPTION(guarded_sample_seed)},
{ 0, UNINIT, MI_OPTION(target_segments_per_thread) }, // abandon segments beyond this point, or 0 to disable.
{ 1, UNINIT, MI_OPTION_LEGACY(reclaim_on_free, abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free
{ 2, UNINIT, MI_OPTION(page_full_retain) },
{ 4, UNINIT, MI_OPTION(page_max_candidates) },
{ 0, UNINIT, MI_OPTION(max_vabits) },
{ MI_DEFAULT_PAGEMAP_COMMIT,
UNINIT, MI_OPTION(pagemap_commit) }, // commit the full pagemap upfront?
{ 2, UNINIT, MI_OPTION(page_commit_on_demand) },
};
static void mi_option_init(mi_option_desc_t* desc);
@ -416,7 +430,7 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me
// Define our own limited `fprintf` that avoids memory allocation.
// We do this using `_mi_vsnprintf` with a limited buffer.
static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
char buf[512];
char buf[992];
if (fmt==NULL) return;
if (!mi_recurse_enter()) return;
_mi_vsnprintf(buf, sizeof(buf)-1, fmt, args);
@ -442,6 +456,13 @@ static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix
}
}
void _mi_output_message(const char* fmt, ...) {
va_list args;
va_start(args, fmt);
mi_vfprintf(NULL, NULL, NULL, fmt, args);
va_end(args);
}
void _mi_trace_message(const char* fmt, ...) {
if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher
va_list args;

249
src/os.c
View file

@ -9,21 +9,12 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h"
#define mi_os_stat_increase(stat,amount) _mi_stat_increase(&_mi_stats_main.stat, amount)
#define mi_os_stat_decrease(stat,amount) _mi_stat_decrease(&_mi_stats_main.stat, amount)
#define mi_os_stat_counter_increase(stat,inc) _mi_stat_counter_increase(&_mi_stats_main.stat, inc)
// always use main stats for OS calls
#define os_stats (&_mi_stats_main)
/* -----------------------------------------------------------
Initialization.
----------------------------------------------------------- */
#ifndef MI_DEFAULT_VIRTUAL_ADDRESS_BITS
#if MI_INTPTR_SIZE < 8
#define MI_DEFAULT_VIRTUAL_ADDRESS_BITS 32
#else
#define MI_DEFAULT_VIRTUAL_ADDRESS_BITS 48
#endif
#endif
#ifndef MI_DEFAULT_PHYSICAL_MEMORY
#if MI_INTPTR_SIZE < 8
#define MI_DEFAULT_PHYSICAL_MEMORY 4*MI_GiB
@ -37,7 +28,7 @@ static mi_os_mem_config_t mi_os_mem_config = {
0, // large page size (usually 2MiB)
4096, // allocation granularity
MI_DEFAULT_PHYSICAL_MEMORY,
MI_DEFAULT_VIRTUAL_ADDRESS_BITS,
MI_MAX_VABITS, // in `bits.h`
true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems)
false, // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)
@ -62,6 +53,18 @@ size_t _mi_os_large_page_size(void) {
return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
}
size_t _mi_os_guard_page_size(void) {
const size_t gsize = _mi_os_page_size();
mi_assert(gsize <= (MI_ARENA_SLICE_SIZE/8));
return gsize;
}
size_t _mi_os_virtual_address_bits(void) {
const size_t vbits = mi_os_mem_config.virtual_address_bits;
mi_assert(vbits <= MI_MAX_VABITS);
return vbits;
}
bool _mi_os_use_large_page(size_t size, size_t alignment) {
// if we have access, check the size and alignment requirements
if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;
@ -91,73 +94,54 @@ void _mi_os_init(void) {
bool _mi_os_decommit(void* addr, size_t size);
bool _mi_os_commit(void* addr, size_t size, bool* is_zero);
static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
mi_assert_internal(alignment != 0);
uintptr_t mask = alignment - 1;
if ((alignment & mask) == 0) { // power of two?
return (sz & ~mask);
}
else {
return ((sz / alignment) * alignment);
}
}
static void* mi_align_down_ptr(void* p, size_t alignment) {
return (void*)_mi_align_down((uintptr_t)p, alignment);
}
/* -----------------------------------------------------------
aligned hinting
-------------------------------------------------------------- */
// On systems with enough virtual address bits, we can do efficient aligned allocation by using
// the 2TiB to 30TiB area to allocate those. If we have at least 46 bits of virtual address
// space (64TiB) we use this technique. (but see issue #939)
#if (MI_INTPTR_SIZE >= 8) && !defined(MI_NO_ALIGNED_HINT)
static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
// Return a MI_SEGMENT_SIZE aligned address that is probably available.
// If this returns NULL, the OS will determine the address but on some OS's that may not be
// properly aligned which can be more costly as it needs to be adjusted afterwards.
// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
// in the middle of the 2TiB - 6TiB address range (see issue #372))
#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
{
if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
if (mi_os_mem_config.virtual_address_bits < 46) return NULL; // < 64TiB virtual address space
size = _mi_align_up(size, MI_SEGMENT_SIZE);
if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
#if (MI_SECURE>0)
size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
#endif
uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
uintptr_t init = MI_HINT_BASE;
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
#endif
uintptr_t expected = hint + size;
mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
}
if (hint%try_alignment != 0) return NULL;
return (void*)hint;
}
#else
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
MI_UNUSED(try_alignment); MI_UNUSED(size);
return NULL;
}
#endif
// In secure mode, return the size of a guard page, otherwise 0
size_t _mi_os_secure_guard_page_size(void) {
#if MI_SECURE > 0
return _mi_os_guard_page_size();
#else
return 0;
#endif
}
// In secure mode, try to decommit an area and output a warning if this fails.
bool _mi_os_secure_guard_page_set_at(void* addr, bool is_pinned) {
if (addr == NULL) return true;
#if MI_SECURE > 0
const bool ok = (is_pinned ? false : _mi_os_decommit(addr, _mi_os_secure_guard_page_size()));
if (!ok) {
_mi_error_message(EINVAL, "secure level %d, but failed to commit guard page (at %p of size %zu)\n", MI_SECURE, addr, _mi_os_secure_guard_page_size());
}
return ok;
#else
MI_UNUSED(is_pinned);
return true;
#endif
}
// In secure mode, try to decommit an area and output a warning if this fails.
bool _mi_os_secure_guard_page_set_before(void* addr, bool is_pinned) {
return _mi_os_secure_guard_page_set_at((uint8_t*)addr - _mi_os_secure_guard_page_size(), is_pinned);
}
// In secure mode, try to recommit an area
bool _mi_os_secure_guard_page_reset_at(void* addr) {
if (addr == NULL) return true;
#if MI_SECURE > 0
return _mi_os_commit(addr, _mi_os_secure_guard_page_size(), NULL);
#else
return true;
#endif
}
// In secure mode, try to recommit an area
bool _mi_os_secure_guard_page_reset_before(void* addr) {
return _mi_os_secure_guard_page_reset_at((uint8_t*)addr - _mi_os_secure_guard_page_size());
}
/* -----------------------------------------------------------
Free memory
@ -186,10 +170,10 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
void* base = addr;
// different base? (due to alignment)
if (memid.mem.os.base != base) {
mi_assert(memid.mem.os.base <= addr);
mi_assert(memid.mem.os.base <= addr);
base = memid.mem.os.base;
const size_t diff = (uint8_t*)addr - (uint8_t*)memid.mem.os.base;
if (memid.mem.os.size==0) {
if (memid.mem.os.size==0) {
csize += diff;
}
if (still_committed) {
@ -236,8 +220,6 @@ static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignm
_mi_warning_message("unable to allocate OS memory (error: %d (0x%x), addr: %p, size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, hint_addr, size, try_alignment, commit, allow_large);
}
mi_os_stat_counter_increase(mmap_calls, 1);
if (p != NULL) {
mi_os_stat_increase(reserved, size);
@ -270,18 +252,24 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
size = _mi_align_up(size, _mi_os_page_size());
// try first with a requested alignment hint (this will usually be aligned directly on Win 10+ or BSD)
void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero);
if (p == NULL) return NULL;
// try a direct allocation if the alignment is below the default, or if larger than 1/8 fraction of the size.
const bool try_direct_alloc = (alignment <= mi_os_mem_config.alloc_granularity || alignment > size/8);
void* p = NULL;
if (try_direct_alloc) {
p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero);
}
// aligned already?
if (((uintptr_t)p % alignment) == 0) {
if (p != NULL && ((uintptr_t)p % alignment) == 0) {
*base = p;
}
else {
// if not aligned, free it, overallocate, and unmap around it
#if !MI_TRACK_ASAN
_mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
if (try_direct_alloc) {
_mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
}
#endif
if (p != NULL) { mi_os_prim_free(p, size, (commit ? size : 0)); }
if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
@ -293,10 +281,10 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
if (p == NULL) return NULL;
// set p to the aligned part in the full region
// note: this is dangerous on Windows as VirtualFree needs the actual base pointer
// this is handled though by having the `base` field in the memid's
// note: on Windows VirtualFree needs the actual base pointer
// this is handledby having the `base` field in the memid.
*base = p; // remember the base
p = mi_align_up_ptr(p, alignment);
p = _mi_align_up_ptr(p, alignment);
// explicitly commit only the aligned part
if (commit) {
@ -309,7 +297,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
if (p == NULL) return NULL;
// and selectively unmap parts around the over-allocated area.
void* aligned_p = mi_align_up_ptr(p, alignment);
void* aligned_p = _mi_align_up_ptr(p, alignment);
size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
size_t mid_size = _mi_align_up(size, _mi_os_page_size());
size_t post_size = over_size - pre_size - mid_size;
@ -339,7 +327,7 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid) {
bool os_is_zero = false;
void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero);
if (p != NULL) {
*memid = _mi_memid_create_os(true, os_is_zero, os_is_large);
*memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large);
}
return p;
}
@ -355,9 +343,9 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
bool os_is_large = false;
bool os_is_zero = false;
void* os_base = NULL;
void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base );
void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base);
if (p != NULL) {
*memid = _mi_memid_create_os(commit, os_is_zero, os_is_large);
*memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large);
memid->mem.os.base = os_base;
// memid->mem.os.alignment = alignment;
memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned
@ -365,6 +353,18 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
return p;
}
void* _mi_os_zalloc(size_t size, mi_memid_t* memid) {
void* p = _mi_os_alloc(size, memid);
if (p == NULL) return NULL;
// zero the OS memory if needed
if (!memid->initially_zero) {
_mi_memzero_aligned(p, size);
memid->initially_zero = true;
}
return p;
}
/* -----------------------------------------------------------
OS aligned allocation with an offset. This is used
for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc
@ -374,11 +374,9 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
----------------------------------------------------------- */
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid) {
mi_assert(offset <= MI_SEGMENT_SIZE);
mi_assert(offset <= size);
mi_assert((alignment % _mi_os_page_size()) == 0);
*memid = _mi_memid_none();
if (offset > MI_SEGMENT_SIZE) return NULL;
if (offset == 0) {
// regular aligned allocation
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid);
@ -411,11 +409,11 @@ static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size,
if (newsize != NULL) *newsize = 0;
if (size == 0 || addr == NULL) return NULL;
// page align conservatively within the range
void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size())
// page align conservatively within the range, or liberally straddling pages outside the range
void* start = (conservative ? _mi_align_up_ptr(addr, _mi_os_page_size())
: mi_align_down_ptr(addr, _mi_os_page_size()));
void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size())
: mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));
: _mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));
ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start;
if (diff <= 0) return NULL;
@ -526,7 +524,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size)
return needs_recommit;
}
else {
if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed
if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed (on Windows, we cannot reset uncommitted memory)
_mi_os_reset(p, size);
}
return false; // needs no recommit
@ -591,15 +589,14 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
start = huge_start;
if (start == 0) {
// Initialize the start address after the 32TiB area
start = ((uintptr_t)32 << 40); // 32TiB virtual start address
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
start = ((uintptr_t)8 << 40); // 8TiB virtual start address
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB
#endif
}
end = start + size;
mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
} while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
} while (!mi_atomic_cas_weak_acq_rel(&mi_huge_start, &huge_start, end));
if (total_size != NULL) *total_size = size;
return (uint8_t*)start;
@ -612,7 +609,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
}
#endif
// Allocate MI_SEGMENT_SIZE aligned huge pages
// Allocate MI_ARENA_SLICE_ALIGN aligned huge pages
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {
*memid = _mi_memid_none();
if (psize != NULL) *psize = 0;
@ -674,7 +671,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
if (page != 0) {
mi_assert(start != NULL);
*memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */);
*memid = _mi_memid_create_os(start, *psize, true /* is committed */, all_zero, true /* is_large */);
memid->memkind = MI_MEM_OS_HUGE;
mi_assert(memid->is_pinned);
#ifdef MI_TRACK_ASAN
@ -727,3 +724,49 @@ int _mi_os_numa_node_get(void) {
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
return (int)numa_node;
}
/* ----------------------------------------------------------------------------
Public API
-----------------------------------------------------------------------------*/
#if 0
mi_decl_export void* mi_os_alloc(size_t size, bool commit, size_t* full_size) {
return mi_os_alloc_aligned(size, mi_os_mem_config.alloc_granularity, commit, NULL, full_size);
}
static void* mi_os_alloc_aligned_ex(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_committed, bool* is_pinned, void** base, size_t* full_size) {
mi_memid_t memid = _mi_memid_none();
void* p = _mi_os_alloc_aligned(size, alignment, commit, allow_large, &memid);
if (p == NULL) return p;
if (is_committed != NULL) { *is_committed = memid.initially_committed; }
if (is_pinned != NULL) { *is_pinned = memid.is_pinned; }
if (base != NULL) { *base = memid.mem.os.base; }
if (full_size != NULL) { *full_size = memid.mem.os.size; }
if (!memid.initially_zero && memid.initially_committed) {
_mi_memzero_aligned(memid.mem.os.base, memid.mem.os.size);
}
return p;
}
mi_decl_export void* mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, void** base, size_t* full_size) {
return mi_os_alloc_aligned_ex(size, alignment, commit, false, NULL, NULL, base, full_size);
}
mi_decl_export void* mi_os_alloc_aligned_allow_large(size_t size, size_t alignment, bool commit, bool* is_committed, bool* is_pinned, void** base, size_t* full_size) {
return mi_os_alloc_aligned_ex(size, alignment, commit, true, is_committed, is_pinned, base, full_size);
}
mi_decl_export void mi_os_free(void* p, size_t size) {
if (p==NULL || size == 0) return;
mi_memid_t memid = _mi_memid_create_os(p, size, true, false, false);
_mi_os_free(p, size, memid);
}
mi_decl_export void mi_os_commit(void* p, size_t size) {
_mi_os_commit(p, size, NULL);
}
mi_decl_export void mi_os_decommit(void* p, size_t size) {
_mi_os_decommit(p, size);
}
#endif

329
src/page-map.c Normal file
View file

@ -0,0 +1,329 @@
/*----------------------------------------------------------------------------
Copyright (c) 2023-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "bitmap.h"
#if MI_PAGE_MAP_FLAT
// The page-map contains a byte for each 64kb slice in the address space.
// For an address `a` where `ofs = _mi_page_map[a >> 16]`:
// 0 = unused
// 1 = the slice at `a & ~0xFFFF` is a mimalloc page.
// 1 < ofs <= 127 = the slice is part of a page, starting at `(((a>>16) - ofs - 1) << 16)`.
//
// 1 byte per slice => 1 TiB address space needs a 2^14 * 2^16 = 16 MiB page map.
// A full 256 TiB address space (48 bit) needs a 4 GiB page map.
// A full 4 GiB address space (32 bit) needs only a 64 KiB page map.
mi_decl_cache_align uint8_t* _mi_page_map = NULL;
static void* mi_page_map_max_address = NULL;
static mi_memid_t mi_page_map_memid;
#define MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT MI_ARENA_SLICE_SIZE
static mi_bitmap_t* mi_page_map_commit; // one bit per committed 64 KiB entries
static void mi_page_map_ensure_committed(size_t idx, size_t slice_count);
bool _mi_page_map_init(void) {
size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS);
if (vbits == 0) {
vbits = _mi_os_virtual_address_bits();
#if MI_ARCH_X64 // canonical address is limited to the first 128 TiB
if (vbits >= 48) { vbits = 47; }
#endif
}
// Allocate the page map and commit bits
mi_page_map_max_address = (void*)(MI_PU(1) << vbits);
const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_SLICE_SHIFT));
const bool commit = (page_map_size <= 1*MI_MiB || mi_option_is_enabled(mi_option_pagemap_commit)); // _mi_os_has_overcommit(); // commit on-access on Linux systems?
const size_t commit_bits = _mi_divide_up(page_map_size, MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT);
const size_t bitmap_size = (commit ? 0 : mi_bitmap_size(commit_bits, NULL));
const size_t reserve_size = bitmap_size + page_map_size;
uint8_t* const base = (uint8_t*)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid);
if (base==NULL) {
_mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
return false;
}
if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) {
_mi_warning_message("internal: the page map was committed but not zero initialized!\n");
_mi_memzero_aligned(base, reserve_size);
}
if (bitmap_size > 0) {
mi_page_map_commit = (mi_bitmap_t*)base;
_mi_os_commit(mi_page_map_commit, bitmap_size, NULL);
mi_bitmap_init(mi_page_map_commit, commit_bits, true);
}
_mi_page_map = base + bitmap_size;
// commit the first part so NULL pointers get resolved without an access violation
if (!commit) {
mi_page_map_ensure_committed(0, 1);
}
_mi_page_map[0] = 1; // so _mi_ptr_page(NULL) == NULL
mi_assert_internal(_mi_ptr_page(NULL)==NULL);
return true;
}
static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
// is the page map area that contains the page address committed?
// we always set the commit bits so we can track what ranges are in-use.
// we only actually commit if the map wasn't committed fully already.
if (mi_page_map_commit != NULL) {
const size_t commit_idx = idx / MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT;
const size_t commit_idx_hi = (idx + slice_count - 1) / MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT;
for (size_t i = commit_idx; i <= commit_idx_hi; i++) { // per bit to avoid crossing over bitmap chunks
if (mi_bitmap_is_clear(mi_page_map_commit, i)) {
// this may race, in which case we do multiple commits (which is ok)
bool is_zero;
uint8_t* const start = _mi_page_map + (i * MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT);
const size_t size = MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT;
_mi_os_commit(start, size, &is_zero);
if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); }
mi_bitmap_set(mi_page_map_commit, i);
}
}
}
#if MI_DEBUG > 0
_mi_page_map[idx] = 0;
_mi_page_map[idx+slice_count-1] = 0;
#endif
}
static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* slice_count) {
size_t page_size;
*page_start = mi_page_area(page, &page_size);
if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer
*slice_count = mi_slice_count_of_size(page_size) + (((uint8_t*)*page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks
return _mi_page_map_index(page);
}
void _mi_page_map_register(mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access!
if mi_unlikely(_mi_page_map == NULL) {
if (!_mi_page_map_init()) return;
}
mi_assert(_mi_page_map!=NULL);
uint8_t* page_start;
size_t slice_count;
const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count);
mi_page_map_ensure_committed(idx, slice_count);
// set the offsets
for (size_t i = 0; i < slice_count; i++) {
mi_assert_internal(i < 128);
_mi_page_map[idx + i] = (uint8_t)(i+1);
}
}
void _mi_page_map_unregister(mi_page_t* page) {
mi_assert_internal(_mi_page_map != NULL);
// get index and count
uint8_t* page_start;
size_t slice_count;
const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count);
// unset the offsets
_mi_memzero(_mi_page_map + idx, slice_count);
}
void _mi_page_map_unregister_range(void* start, size_t size) {
const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE);
const uintptr_t index = _mi_page_map_index(start);
mi_page_map_ensure_committed(index, slice_count); // we commit the range in total; todo: scan the commit bits and clear only those ranges?
_mi_memzero(&_mi_page_map[index], slice_count);
}
mi_page_t* _mi_safe_ptr_page(const void* p) {
if mi_unlikely(p >= mi_page_map_max_address) return NULL;
const uintptr_t idx = _mi_page_map_index(p);
if mi_unlikely(mi_page_map_commit != NULL && !mi_bitmap_is_set(mi_page_map_commit, idx/MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT)) return NULL;
const uintptr_t ofs = _mi_page_map[idx];
if mi_unlikely(ofs == 0) return NULL;
return (mi_page_t*)((((uintptr_t)p >> MI_ARENA_SLICE_SHIFT) - ofs + 1) << MI_ARENA_SLICE_SHIFT);
}
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
return (_mi_safe_ptr_page(p) != NULL);
}
#else
// A 2-level page map
#define MI_PAGE_MAP_SUB_SIZE (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*))
mi_decl_cache_align mi_page_t*** _mi_page_map;
static void* mi_page_map_max_address;
static mi_memid_t mi_page_map_memid;
static _Atomic(mi_bfield_t) mi_page_map_commit;
static mi_page_t** mi_page_map_ensure_committed(size_t idx);
static mi_page_t** mi_page_map_ensure_at(size_t idx);
static inline void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count);
bool _mi_page_map_init(void) {
size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS);
if (vbits == 0) {
vbits = _mi_os_virtual_address_bits();
#if MI_ARCH_X64 // canonical address is limited to the first 128 TiB
if (vbits >= 48) { vbits = 47; }
#endif
}
// Allocate the page map and commit bits
mi_assert(MI_MAX_VABITS >= vbits);
mi_page_map_max_address = (void*)(MI_PU(1) << vbits);
const size_t page_map_count = (MI_ZU(1) << (vbits - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT));
mi_assert(page_map_count <= MI_PAGE_MAP_COUNT);
const size_t os_page_size = _mi_os_page_size();
const size_t page_map_size = _mi_align_up( page_map_count * sizeof(mi_page_t**), os_page_size);
const size_t reserve_size = page_map_size + os_page_size;
const bool commit = page_map_size <= 64*MI_KiB ||
mi_option_is_enabled(mi_option_pagemap_commit) || _mi_os_has_overcommit();
_mi_page_map = (mi_page_t***)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid);
if (_mi_page_map==NULL) {
_mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
return false;
}
if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) {
_mi_warning_message("internal: the page map was committed but not zero initialized!\n");
_mi_memzero_aligned(_mi_page_map, page_map_size);
}
mi_atomic_store_release(&mi_page_map_commit, (commit ? ~MI_ZU(0) : MI_ZU(0)));
// note: for the NULL range we only commit one OS page (in the map and sub)
if (!mi_page_map_memid.initially_committed) {
_mi_os_commit(&_mi_page_map[0], os_page_size, NULL); // commit first part of the map
}
_mi_page_map[0] = (mi_page_t**)((uint8_t*)_mi_page_map + page_map_size); // we reserved 2 subs at the end already
if (!mi_page_map_memid.initially_committed) {
_mi_os_commit(_mi_page_map[0], os_page_size, NULL); // only first OS page
}
_mi_page_map[0][0] = (mi_page_t*)&_mi_page_empty; // caught in `mi_free`
mi_assert_internal(_mi_ptr_page(NULL)==&_mi_page_empty);
return true;
}
#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS)
static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) {
mi_bfield_t commit = mi_atomic_load_relaxed(&mi_page_map_commit);
const size_t bit_idx = idx/MI_PAGE_MAP_ENTRIES_PER_CBIT;
mi_assert_internal(bit_idx < MI_BFIELD_BITS);
if (pbit_idx != NULL) { *pbit_idx = bit_idx; }
return ((commit & (MI_ZU(1) << bit_idx)) != 0);
}
static mi_page_t** mi_page_map_ensure_committed(size_t idx) {
size_t bit_idx;
if mi_unlikely(!mi_page_map_is_committed(idx, &bit_idx)) {
uint8_t* start = (uint8_t*)&_mi_page_map[bit_idx * MI_PAGE_MAP_ENTRIES_PER_CBIT];
_mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_page_t**), NULL);
mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx);
}
return _mi_page_map[idx];
}
static mi_page_t** mi_page_map_ensure_at(size_t idx) {
mi_page_t** sub = mi_page_map_ensure_committed(idx);
if mi_unlikely(sub == NULL) {
// sub map not yet allocated, alloc now
mi_memid_t memid;
sub = (mi_page_t**)_mi_os_alloc(MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), &memid);
mi_page_t** expect = NULL;
if (!mi_atomic_cas_strong_acq_rel(((_Atomic(mi_page_t**)*)&_mi_page_map[idx]), &expect, sub)) {
// another thread already allocated it.. free and continue
_mi_os_free(sub, MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), memid);
sub = expect;
mi_assert_internal(sub!=NULL);
}
if (sub == NULL) {
_mi_error_message(EFAULT, "internal error: unable to extend the page map\n");
}
}
return sub;
}
static void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) {
// is the page map area that contains the page address committed?
while (slice_count > 0) {
mi_page_t** sub = mi_page_map_ensure_at(idx);
// set the offsets for the page
while (sub_idx < MI_PAGE_MAP_SUB_COUNT) {
sub[sub_idx] = page;
slice_count--; if (slice_count == 0) return;
sub_idx++;
}
idx++; // potentially wrap around to the next idx
sub_idx = 0;
}
}
static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slice_count) {
size_t page_size;
uint8_t* page_start = mi_page_area(page, &page_size);
if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer
*slice_count = mi_slice_count_of_size(page_size) + ((page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks
return _mi_page_map_index(page, sub_idx);
}
void _mi_page_map_register(mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access!
if mi_unlikely(_mi_page_map == NULL) {
if (!_mi_page_map_init()) return;
}
mi_assert(_mi_page_map!=NULL);
size_t slice_count;
size_t sub_idx;
const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count);
mi_page_map_set_range(page, idx, sub_idx, slice_count);
}
void _mi_page_map_unregister(mi_page_t* page) {
mi_assert_internal(_mi_page_map != NULL);
// get index and count
size_t slice_count;
size_t sub_idx;
const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count);
// unset the offsets
mi_page_map_set_range(page, idx, sub_idx, slice_count);
}
void _mi_page_map_unregister_range(void* start, size_t size) {
const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE);
size_t sub_idx;
const uintptr_t idx = _mi_page_map_index(start, &sub_idx);
mi_page_map_set_range(NULL, idx, sub_idx, slice_count); // todo: avoid committing if not already committed?
}
mi_page_t* _mi_safe_ptr_page(const void* p) {
if mi_unlikely(p >= mi_page_map_max_address) return NULL;
size_t sub_idx;
const size_t idx = _mi_page_map_index(p,&sub_idx);
if mi_unlikely(!mi_page_map_is_committed(idx,NULL)) return NULL;
mi_page_t** const sub = _mi_page_map[idx];
if mi_unlikely(sub==NULL) return NULL;
return sub[sub_idx];
}
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
return (_mi_safe_ptr_page(p) != NULL);
}
#endif

View file

@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MI_IN_PAGE_C
#error "this file should be included from 'page.c'"
// include to help an IDE
#include "mimalloc.h"
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#endif
@ -38,15 +38,15 @@ terms of the MIT license. A copy of the license can be found in the file
static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) {
return (pq->block_size == (MI_LARGE_OBJ_SIZE_MAX+sizeof(uintptr_t)));
return (pq->block_size == (MI_LARGE_MAX_OBJ_SIZE+sizeof(uintptr_t)));
}
static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) {
return (pq->block_size == (MI_LARGE_OBJ_SIZE_MAX+(2*sizeof(uintptr_t))));
return (pq->block_size == (MI_LARGE_MAX_OBJ_SIZE+(2*sizeof(uintptr_t))));
}
static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
return (pq->block_size > MI_LARGE_OBJ_SIZE_MAX);
return (pq->block_size > MI_LARGE_MAX_OBJ_SIZE);
}
/* -----------------------------------------------------------
@ -76,7 +76,7 @@ static inline uint8_t mi_bin(size_t size) {
bin = (uint8_t)wsize;
}
#endif
else if (wsize > MI_LARGE_OBJ_WSIZE_MAX) {
else if (wsize > MI_LARGE_MAX_OBJ_WSIZE) {
bin = MI_BIN_HUGE;
}
else {
@ -84,8 +84,9 @@ static inline uint8_t mi_bin(size_t size) {
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
#endif
wsize--;
// find the highest bit
uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0
mi_assert_internal(wsize!=0);
// find the highest bit position
uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize));
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin
@ -111,8 +112,8 @@ size_t _mi_bin_size(uint8_t bin) {
}
// Good size for allocation
size_t mi_good_size(size_t size) mi_attr_noexcept {
if (size <= MI_LARGE_OBJ_SIZE_MAX) {
mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept {
if (size <= MI_LARGE_MAX_OBJ_SIZE) {
return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE));
}
else {
@ -210,8 +211,8 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(queue, page));
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_heap_t* heap = mi_page_heap(page);
if (page->prev != NULL) page->prev->next = page->next;
@ -226,7 +227,6 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
heap->page_count--;
page->next = NULL;
page->prev = NULL;
// mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL);
mi_page_set_in_full(page,false);
}
@ -242,7 +242,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
// mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap);
page->next = queue->first;
page->prev = NULL;
if (queue->first != NULL) {
@ -259,6 +259,34 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
heap->page_count++;
}
static void mi_page_queue_push_at_end(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(!mi_page_queue_contains(queue, page));
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
page->prev = queue->last;
page->next = NULL;
if (queue->last != NULL) {
mi_assert_internal(queue->last->next == NULL);
queue->last->next = page;
queue->last = page;
}
else {
queue->first = queue->last = page;
}
// update direct
if (queue->first == page) {
mi_heap_queue_first_update(heap, queue);
}
heap->page_count++;
}
static void mi_page_queue_move_to_front(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(mi_page_queue_contains(queue, page));
@ -317,8 +345,8 @@ static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t*
page->prev = to->first;
page->next = next;
to->first->next = page;
if (next != NULL) {
next->prev = page;
if (next != NULL) {
next->prev = page;
}
else {
to->last = page;
@ -356,13 +384,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
// set append pages to new heap and count
size_t count = 0;
for (mi_page_t* page = append->first; page != NULL; page = page->next) {
// inline `mi_page_set_heap` to avoid wrong assertion during absorption;
// in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive.
mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
// set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a
// side effect that it spins until any DELAYED_FREEING is finished. This ensures
// that after appending only the new heap will be used for delayed free operations.
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false);
mi_page_set_heap(page, heap);
count++;
}

View file

@ -36,14 +36,15 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta
return (mi_block_t*)((uint8_t*)page_start + (i * block_size));
}
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld);
//static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page);
#if (MI_DEBUG>=3)
static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
mi_assert_internal(_mi_ptr_page(page) == page);
size_t count = 0;
while (head != NULL) {
mi_assert_internal(page == _mi_ptr_page(head));
mi_assert_internal((uint8_t*)head - (uint8_t*)page > (ptrdiff_t)MI_LARGE_PAGE_SIZE || page == _mi_ptr_page(head));
count++;
head = mi_block_next(page, head);
}
@ -59,7 +60,7 @@ static inline uint8_t* mi_page_area(const mi_page_t* page) {
static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
size_t psize;
uint8_t* page_area = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
uint8_t* page_area = mi_page_area(page, &psize);
mi_block_t* start = (mi_block_t*)page_area;
mi_block_t* end = (mi_block_t*)(page_area + psize);
while(p != NULL) {
@ -83,10 +84,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
mi_assert_internal(page->capacity <= page->reserved);
// const size_t bsize = mi_page_block_size(page);
mi_segment_t* segment = _mi_page_segment(page);
uint8_t* start = mi_page_start(page);
mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE));
// uint8_t* start = mi_page_start(page);
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
mi_assert_internal(mi_page_list_is_valid(page,page->free));
@ -121,64 +119,25 @@ bool _mi_page_is_valid(mi_page_t* page) {
#if MI_SECURE
mi_assert_internal(page->keys[0] != 0);
#endif
if (mi_page_heap(page)!=NULL) {
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0);
#if MI_HUGE_PAGE_ABANDON
if (segment->page_kind != MI_PAGE_HUGE)
#endif
if (!mi_page_is_abandoned(page)) {
//mi_assert_internal(!_mi_process_is_initialized);
{
mi_page_queue_t* pq = mi_page_queue_of(page);
mi_assert_internal(mi_page_queue_contains(pq, page));
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_is_huge(page) || mi_page_is_in_full(page));
// mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
}
}
return true;
}
#endif
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
while (!_mi_page_try_use_delayed_free(page, delay, override_never)) {
mi_atomic_yield();
}
}
bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
mi_thread_free_t tfreex;
mi_delayed_t old_delay;
mi_thread_free_t tfree;
size_t yield_count = 0;
do {
tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS;
tfreex = mi_tf_set_delayed(tfree, delay);
old_delay = mi_tf_delayed(tfree);
if mi_unlikely(old_delay == MI_DELAYED_FREEING) {
if (yield_count >= 4) return false; // give up after 4 tries
yield_count++;
mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
// tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
}
else if (delay == old_delay) {
break; // avoid atomic operation if already equal
}
else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
break; // leave never-delayed flag set
}
} while ((old_delay == MI_DELAYED_FREEING) ||
!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
return true; // success
}
/* -----------------------------------------------------------
Page collect the `local_free` and `thread_free` lists
----------------------------------------------------------- */
// Collect the local `thread_free` list using an atomic exchange.
// Note: The exchange must be done atomically as this is used right after
// moving to the full list in `mi_page_collect_ex` and we need to
// ensure that there was no race where the page became unfull just before the move.
static void _mi_page_thread_free_collect(mi_page_t* page)
{
mi_block_t* head;
@ -186,21 +145,21 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
head = mi_tf_block(tfree);
tfreex = mi_tf_set_block(tfree,NULL);
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex));
// return if the list is empty
if (head == NULL) return;
if (head == NULL) return; // return if the list is empty
tfreex = mi_tf_create(NULL,mi_tf_is_owned(tfree)); // set the thread free list to NULL
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex)); // release is enough?
mi_assert_internal(head != NULL);
// find the tail -- also to get a proper count (without data races)
size_t max_count = page->capacity; // cannot collect more than capacity
size_t count = 1;
mi_block_t* tail = head;
mi_block_t* next;
while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) {
while( (next = mi_block_next(page,tail)) != NULL && count <= max_count) {
count++;
tail = next;
}
// if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free)
if (count > max_count) {
_mi_error_message(EFAULT, "corrupted thread-free list\n");
@ -219,9 +178,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
mi_assert_internal(page!=NULL);
// collect the thread free list
if (force || mi_page_thread_free(page) != NULL) { // quick test to avoid an atomic operation
_mi_page_thread_free_collect(page);
}
_mi_page_thread_free_collect(page);
// and the local free list
if (page->local_free != NULL) {
@ -254,43 +211,83 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
Page fresh and retire
----------------------------------------------------------- */
/*
// called from segments when reclaiming abandoned pages
void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
// mi_page_set_heap(page, heap);
// _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
_mi_page_free_collect(page, false); // ensure used count is up to date
mi_assert_expensive(mi_page_is_valid_init(page));
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
#endif
// mi_assert_internal(mi_page_heap(page) == heap);
// mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
// TODO: push on full queue immediately if it is full?
mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
mi_page_queue_push(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page));
}
*/
// called from `mi_free` on a reclaim, and fresh_alloc if we get an abandoned page
void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page)
{
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page));
mi_page_set_heap(page,heap);
_mi_page_free_collect(page, false); // ensure used count is up to date
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
mi_page_queue_push_at_end(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page));
}
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
_mi_page_free_collect(page, false); // ensure used count is up to date
if (mi_page_all_free(page)) {
_mi_page_free(page, pq);
}
else {
mi_page_queue_remove(pq, page);
mi_tld_t* tld = page->heap->tld;
mi_page_set_heap(page, NULL);
_mi_arenas_page_abandon(page);
_mi_arenas_collect(false, false, tld); // allow purging
}
}
// allocate a fresh page from a segment
static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) {
#if !MI_HUGE_PAGE_ABANDON
mi_assert_internal(pq != NULL);
mi_assert_internal(mi_heap_contains_queue(heap, pq));
mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_OBJ_SIZE_MAX || block_size == pq->block_size);
#endif
mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments);
mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_MAX_OBJ_SIZE || block_size == pq->block_size);
#endif
mi_page_t* page = _mi_arenas_page_alloc(heap, block_size, page_alignment);
if (page == NULL) {
// this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
// out-of-memory
return NULL;
}
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
#endif
mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
// a fresh page was found, initialize it
const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
mi_assert_internal(full_block_size >= block_size);
mi_page_init(heap, page, full_block_size, heap->tld);
if (mi_page_is_abandoned(page)) {
_mi_heap_page_reclaim(heap, page);
if (!mi_page_immediate_available(page)) {
if (mi_page_is_expandable(page)) {
mi_page_extend_free(heap, page);
}
else {
mi_assert(false); // should not happen?
return NULL;
}
}
}
else if (pq != NULL) {
mi_page_queue_push(heap, pq, page);
}
mi_heap_stat_increase(heap, pages, 1);
if (pq != NULL) { mi_page_queue_push(heap, pq, page); }
mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
mi_assert_expensive(_mi_page_is_valid(page));
return page;
}
@ -301,55 +298,21 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0);
if (page==NULL) return NULL;
mi_assert_internal(pq->block_size==mi_page_block_size(page));
mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page)));
mi_assert_internal(pq==mi_heap_page_queue_of(heap, page));
return page;
}
/* -----------------------------------------------------------
Do any delayed frees
(put there by other threads if they deallocated in a full page)
----------------------------------------------------------- */
void _mi_heap_delayed_free_all(mi_heap_t* heap) {
while (!_mi_heap_delayed_free_partial(heap)) {
mi_atomic_yield();
}
}
// returns true if all delayed frees were processed
bool _mi_heap_delayed_free_partial(mi_heap_t* heap) {
// take over the list (note: no atomic exchange since it is often NULL)
mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ };
bool all_freed = true;
// and free them all
while(block != NULL) {
mi_block_t* next = mi_block_nextx(heap,block, heap->keys);
// use internal free instead of regular one to keep stats etc correct
if (!_mi_free_delayed_block(block)) {
// we might already start delayed freeing while another thread has not yet
// reset the delayed_freeing flag; in that case delay it further by reinserting the current block
// into the delayed free list
all_freed = false;
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
do {
mi_block_set_nextx(heap, block, dfree, heap->keys);
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
}
block = next;
}
return all_freed;
}
/* -----------------------------------------------------------
Unfull, abandon, free and retire
----------------------------------------------------------- */
// Move a page from the full list back to a regular list
// Move a page from the full list back to a regular list (called from thread-local mi_free)
void _mi_page_unfull(mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(mi_page_is_in_full(page));
mi_assert_internal(!mi_page_heap(page)->allow_page_abandon);
if (!mi_page_is_in_full(page)) return;
mi_heap_t* heap = mi_page_heap(page);
@ -365,85 +328,40 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(!mi_page_immediate_available(page));
mi_assert_internal(!mi_page_is_in_full(page));
if (mi_page_is_in_full(page)) return;
mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
_mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
}
// Abandon a page with used blocks at the end of a thread.
// Note: only call if it is ensured that no references exist from
// the `page->heap->thread_delayed_free` into this page.
// Currently only called through `mi_heap_collect_ex` which ensures this.
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(mi_page_heap(page) != NULL);
mi_heap_t* pheap = mi_page_heap(page);
// remove from our page list
mi_segments_tld_t* segments_tld = &pheap->tld->segments;
mi_page_queue_remove(pq, page);
// page is no longer associated with our heap
mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
mi_page_set_heap(page, NULL);
#if (MI_DEBUG>1) && !MI_TRACK_ENABLED
// check there are no references left..
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) {
mi_assert_internal(_mi_ptr_page(block) != page);
}
#endif
// and abandon it
mi_assert_internal(mi_page_heap(page) == NULL);
_mi_segment_page_abandon(page,segments_tld);
}
// force abandon a page
void _mi_page_force_abandon(mi_page_t* page) {
mi_heap_t* heap = mi_page_heap(page);
// mark page as not using delayed free
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
// ensure this page is no longer in the heap delayed free list
_mi_heap_delayed_free_all(heap);
// We can still access the page meta-info even if it is freed as we ensure
// in `mi_segment_force_abandon` that the segment is not freed (yet)
if (page->capacity == 0) return; // it may have been freed now
// and now unlink it from the page queue and abandon (or free)
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
if (mi_page_all_free(page)) {
_mi_page_free(page, pq, false);
}
else {
if (heap->allow_page_abandon) {
// abandon full pages
_mi_page_abandon(page, pq);
}
else {
// put full pages in a heap local queue
if (mi_page_is_in_full(page)) return;
mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
_mi_page_free_collect(page, false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
}
}
// Free a page with no more free blocks
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(mi_page_all_free(page));
mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
// mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
// no more aligned blocks in here
mi_page_set_has_aligned(page, false);
// remove from the page list
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments;
mi_page_queue_remove(pq, page);
// and free it
mi_heap_t* heap = page->heap;
mi_page_set_heap(page,NULL);
_mi_segment_page_free(page, force, segments_tld);
_mi_arenas_page_free(page);
_mi_arenas_collect(false, false, heap->tld); // allow purging
}
#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE
@ -473,9 +391,9 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
const size_t bsize = mi_page_block_size(page);
if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue?
if (pq->last==page && pq->first==page) { // the only page in the queue?
mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
mi_heap_t* heap = mi_page_heap(page);
mi_debug_heap_stat_counter_increase(heap, page_no_retire, 1);
page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
mi_assert_internal(pq >= heap->pages);
const size_t index = pq - heap->pages;
mi_assert_internal(index < MI_BIN_FULL && index < MI_BIN_HUGE);
@ -486,7 +404,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
}
}
#endif
_mi_page_free(page, pq, false);
_mi_page_free(page, pq);
}
// free retired pages: we don't need to look at the entire queues
@ -501,7 +419,7 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
if (mi_page_all_free(page)) {
page->retire_expire--;
if (force || page->retire_expire == 0) {
_mi_page_free(pq->first, pq, force);
_mi_page_free(pq->first, pq);
}
else {
// keep retired, update min/max
@ -519,6 +437,36 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
}
static void mi_heap_collect_full_pages(mi_heap_t* heap) {
// note: normally full pages get immediately abandoned and the full queue is always empty
// this path is only used if abandoning is disabled due to a destroy-able heap or options
// set by the user.
mi_page_queue_t* pq = &heap->pages[MI_BIN_FULL];
for (mi_page_t* page = pq->first; page != NULL; ) {
mi_page_t* next = page->next; // get next in case we free the page
_mi_page_free_collect(page, false); // register concurrent free's
// no longer full?
if (!mi_page_is_full(page)) {
if (mi_page_all_free(page)) {
_mi_page_free(page, pq);
}
else {
_mi_page_unfull(page);
}
}
page = next;
}
}
static mi_decl_noinline void mi_heap_generic_collect(mi_heap_t* heap) {
// call potential deferred free routines
_mi_deferred_free(heap, false);
// collect retired pages
_mi_heap_collect_retired(heap, false);
// collect full pages that had concurrent free's
mi_heap_collect_full_pages(heap);
}
/* -----------------------------------------------------------
Initialize the initial free list in a page.
In secure mode we initialize a randomized list by
@ -531,7 +479,7 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) {
MI_UNUSED(stats);
#if (MI_SECURE<=2)
#if (MI_SECURE<3)
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->local_free == NULL);
#endif
@ -589,7 +537,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats)
{
MI_UNUSED(stats);
#if (MI_SECURE <= 2)
#if (MI_SECURE<3)
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->local_free == NULL);
#endif
@ -617,7 +565,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
----------------------------------------------------------- */
#define MI_MAX_EXTEND_SIZE (4*1024) // heuristic, one OS page seems to work well.
#if (MI_SECURE>0)
#if (MI_SECURE>=3)
#define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many
#else
#define MI_MIN_EXTEND (1)
@ -628,9 +576,9 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
// Note: we also experimented with "bump" allocation on the first
// allocations but this did not speed up any benchmark (due to an
// extra test in malloc? or cache effects?)
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) {
mi_assert_expensive(mi_page_is_valid_init(page));
#if (MI_SECURE<=2)
#if (MI_SECURE<3)
mi_assert(page->free == NULL);
mi_assert(page->local_free == NULL);
if (page->free != NULL) return;
@ -639,12 +587,12 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
size_t page_size;
//uint8_t* page_start =
_mi_segment_page_start(_mi_page_segment(page), page, &page_size);
mi_stat_counter_increase(tld->stats.pages_extended, 1);
mi_page_area(page, &page_size);
mi_debug_heap_stat_counter_increase(heap, pages_extended, 1);
// calculate the extend count
const size_t bsize = mi_page_block_size(page);
size_t extend = page->reserved - page->capacity;
size_t extend = (size_t)page->reserved - page->capacity;
mi_assert_internal(extend > 0);
size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/bsize);
@ -660,56 +608,56 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
mi_assert_internal(extend < (1UL<<16));
// commit on demand?
if (page->slice_committed > 0) {
const size_t needed_size = (page->capacity + extend)*bsize;
const size_t needed_commit = _mi_align_up( mi_page_slice_offset_of(page, needed_size), MI_PAGE_MIN_COMMIT_SIZE );
if (needed_commit > page->slice_committed) {
mi_assert_internal(((needed_commit - page->slice_committed) % _mi_os_page_size()) == 0);
_mi_os_commit(mi_page_slice_start(page) + page->slice_committed, needed_commit - page->slice_committed, NULL);
page->slice_committed = needed_commit;
}
}
// and append the extend the free list
if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) {
mi_page_free_list_extend(page, bsize, extend, &tld->stats );
if (extend < MI_MIN_SLICES || MI_SECURE<3) { //!mi_option_is_enabled(mi_option_secure)) {
mi_page_free_list_extend(page, bsize, extend, &heap->tld->stats );
}
else {
mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats);
mi_page_free_list_extend_secure(heap, page, bsize, extend, &heap->tld->stats);
}
// enable the new free list
page->capacity += (uint16_t)extend;
mi_stat_increase(tld->stats.page_committed, extend * bsize);
mi_debug_heap_stat_increase(heap, page_committed, extend * bsize);
mi_assert_expensive(mi_page_is_valid_init(page));
}
// Initialize a fresh page
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) {
// Initialize a fresh page (that is already partially initialized)
void _mi_page_init(mi_heap_t* heap, mi_page_t* page) {
mi_assert(page != NULL);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert(segment != NULL);
mi_assert_internal(block_size > 0);
// set fields
mi_page_set_heap(page, heap);
page->block_size = block_size;
size_t page_size;
page->page_start = _mi_segment_page_start(segment, page, &page_size);
mi_track_mem_noaccess(page->page_start,page_size);
mi_assert_internal(page_size / block_size < (1L<<16));
page->reserved = (uint16_t)(page_size / block_size);
uint8_t* page_start = mi_page_area(page, &page_size); MI_UNUSED(page_start);
mi_track_mem_noaccess(page_start,page_size);
mi_assert_internal(page_size / mi_page_block_size(page) < (1L<<16));
mi_assert_internal(page->reserved > 0);
#if (MI_PADDING || MI_ENCODE_FREELIST)
page->keys[0] = _mi_heap_random_next(heap);
page->keys[1] = _mi_heap_random_next(heap);
#endif
page->free_is_zero = page->is_zero_init;
#if MI_DEBUG>2
if (page->is_zero_init) {
mi_track_mem_defined(page->page_start, page_size);
mi_assert_expensive(mi_mem_is_zero(page->page_start, page_size));
if (page->memid.initially_zero) {
mi_track_mem_defined(page->page_start, mi_page_committed(page));
mi_assert_expensive(mi_mem_is_zero(page_start, mi_page_committed(page)));
}
#endif
if (block_size > 0 && _mi_is_power_of_two(block_size)) {
page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size));
}
else {
page->block_size_shift = 0;
}
mi_assert_internal(page->capacity == 0);
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->used == 0);
mi_assert_internal(page->xthread_free == 0);
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(page->xthread_free == 1);
mi_assert_internal(page->next == NULL);
mi_assert_internal(page->prev == NULL);
mi_assert_internal(page->retire_expire == 0);
@ -718,11 +666,11 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page->keys[0] != 0);
mi_assert_internal(page->keys[1] != 0);
#endif
mi_assert_internal(page->block_size_shift == 0 || (block_size == ((size_t)1 << page->block_size_shift)));
mi_assert_internal(page->block_size_shift == 0 || (mi_page_block_size(page) == ((size_t)1 << page->block_size_shift)));
mi_assert_expensive(mi_page_is_valid_init(page));
// initialize an initial free list
mi_page_extend_free(heap,page,tld);
mi_page_extend_free(heap,page);
mi_assert(mi_page_immediate_available(page));
}
@ -731,40 +679,29 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
Find pages with free blocks
-------------------------------------------------------------*/
// search for a best next page to use for at most N pages (often cut short if immediate blocks are available)
#define MI_MAX_CANDIDATE_SEARCH (4)
// is the page not yet used up to its reserved space?
static bool mi_page_is_expandable(const mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_internal(page->capacity <= page->reserved);
return (page->capacity < page->reserved);
}
// Find a page with free blocks of `page->block_size`.
static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
{
// search through the pages in "next fit" order
#if MI_STAT
size_t count = 0;
#endif
size_t candidate_count = 0; // we reset this on the first candidate to limit the search
long candidate_limit = 0; // we reset this on the first candidate to limit the search
long full_page_retain = heap->full_page_retain;
mi_page_t* page_candidate = NULL; // a page with free space
mi_page_t* page = pq->first;
while (page != NULL)
{
mi_page_t* next = page->next; // remember next
mi_page_t* next = page->next; // remember next (as this page can move to another queue)
#if MI_STAT
count++;
#endif
candidate_count++;
candidate_limit--;
// collect freed blocks by us and other threads
_mi_page_free_collect(page, false);
#if MI_MAX_CANDIDATE_SEARCH > 1
// search up to N pages for a best candidate
// is the local free list non-empty?
@ -773,28 +710,36 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
// if the page is completely full, move it to the `mi_pages_full`
// queue so we don't visit long-lived pages too often.
if (!immediate_available && !mi_page_is_expandable(page)) {
mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
mi_page_to_full(page, pq);
full_page_retain--;
if (full_page_retain < 0) {
mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
mi_page_to_full(page, pq);
}
}
else {
// the page has free space, make it a candidate
// we prefer non-expandable pages with high usage as candidates (to reduce commit, and increase chances of free-ing up pages)
if (page_candidate == NULL) {
page_candidate = page;
candidate_count = 0;
candidate_limit = _mi_option_get_fast(mi_option_page_max_candidates);
}
else if (mi_page_all_free(page_candidate)) {
_mi_page_free(page_candidate, pq);
page_candidate = page;
}
// prefer to reuse fuller pages (in the hope the less used page gets freed)
else if (page->used >= page_candidate->used && !mi_page_is_mostly_used(page) && !mi_page_is_expandable(page)) {
else if (page->used >= page_candidate->used && !mi_page_is_mostly_used(page)) { // && !mi_page_is_expandable(page)) {
page_candidate = page;
}
// if we find a non-expandable candidate, or searched for N pages, return with the best candidate
if (immediate_available || candidate_count > MI_MAX_CANDIDATE_SEARCH) {
if (immediate_available || candidate_limit <= 0) {
mi_assert_internal(page_candidate!=NULL);
break;
}
}
#else
// first-fit algorithm
#if 0
// first-fit algorithm without candidates
// If the page contains free blocks, we are done
if (mi_page_immediate_available(page) || mi_page_is_expandable(page)) {
break; // pick this one
@ -809,26 +754,32 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
page = next;
} // for each page
mi_heap_stat_counter_increase(heap, searches, count);
mi_debug_heap_stat_counter_increase(heap, searches, count);
// set the page to the best candidate
if (page_candidate != NULL) {
page = page_candidate;
}
if (page != NULL && !mi_page_immediate_available(page)) {
mi_assert_internal(mi_page_is_expandable(page));
mi_page_extend_free(heap, page, heap->tld);
if (page != NULL) {
if (!mi_page_immediate_available(page)) {
mi_assert_internal(mi_page_is_expandable(page));
mi_page_extend_free(heap, page);
}
mi_assert_internal(mi_page_immediate_available(page));
}
if (page == NULL) {
_mi_heap_collect_retired(heap, false); // perhaps make a page available
page = mi_page_fresh(heap, pq);
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
if (page == NULL && first_try) {
// out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
page = mi_page_queue_find_free_ex(heap, pq, false);
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
}
}
else {
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
// move the page to the front of the queue
mi_page_queue_move_to_front(heap, pq, page);
page->retire_expire = 0;
@ -843,15 +794,16 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
// Find a page with free blocks of `size`.
static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
mi_page_queue_t* pq = mi_page_queue(heap, size);
static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, mi_page_queue_t* pq) {
// mi_page_queue_t* pq = mi_page_queue(heap, size);
mi_assert_internal(!mi_page_queue_is_huge(pq));
// check the first page: we even do this with candidate search or otherwise we re-search every time
mi_page_t* page = pq->first;
if (page != NULL) {
#if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
mi_page_extend_free(heap, page, heap->tld);
mi_page_extend_free(heap, page);
mi_assert_internal(mi_page_immediate_available(page));
}
else
@ -902,13 +854,13 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
// Huge pages contain just one block, and the segment contains just that page.
// Huge pages are also use if the requested alignment is very large (> MI_BLOCK_ALIGNMENT_MAX)
// so their size is not always `> MI_LARGE_OBJ_SIZE_MAX`.
static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) {
size_t block_size = _mi_os_good_alloc_size(size);
mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment, mi_page_queue_t* pq) {
const size_t block_size = _mi_os_good_alloc_size(size);
// mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
#if MI_HUGE_PAGE_ABANDON
mi_page_queue_t* pq = NULL;
#error todo.
#else
mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // always in the huge queue regardless of the block size
// mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_MAX_OBJ_SIZE+1); // always in the huge queue regardless of the block size
mi_assert_internal(mi_page_queue_is_huge(pq));
#endif
mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment);
@ -916,10 +868,9 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
mi_assert_internal(mi_page_block_size(page) >= size);
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(mi_page_is_huge(page));
mi_assert_internal(_mi_page_segment(page)->page_kind == MI_PAGE_HUGE);
mi_assert_internal(_mi_page_segment(page)->used==1);
mi_assert_internal(mi_page_is_singleton(page));
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
mi_assert_internal(mi_page_is_abandoned(page));
mi_page_set_heap(page, NULL);
#endif
mi_heap_stat_increase(heap, huge, mi_page_block_size(page));
@ -932,30 +883,30 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
// Allocate a page
// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
// huge allocation?
const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) {
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
return NULL;
}
else {
return mi_huge_page_alloc(heap,size,huge_alignment);
}
if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) {
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
return NULL;
}
mi_page_queue_t* pq = mi_page_queue(heap, (huge_alignment > 0 ? MI_LARGE_MAX_OBJ_SIZE+1 : size));
// huge allocation?
if mi_unlikely(mi_page_queue_is_huge(pq) || req_size > MI_MAX_ALLOC_SIZE) {
return mi_huge_page_alloc(heap,size,huge_alignment,pq);
}
else {
// otherwise find a page with free blocks in our size segregated queues
#if MI_PADDING
mi_assert_internal(size >= MI_PADDING_SIZE);
#endif
return mi_find_free_page(heap, size);
return mi_find_free_page(heap, pq);
}
}
// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for
// very large requested alignments in which case we use a huge segment.
// The `huge_alignment` is normally 0 but is set to a multiple of MI_SLICE_SIZE for
// very large requested alignments in which case we use a huge singleton page.
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept
{
mi_assert_internal(heap != NULL);
@ -967,15 +918,16 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
}
mi_assert_internal(mi_heap_is_initialized(heap));
// call potential deferred free routines
_mi_deferred_free(heap, false);
// free delayed frees from other threads (but skip contended ones)
_mi_heap_delayed_free_partial(heap);
// collect every N generic mallocs
if mi_unlikely(heap->generic_count++ > 10000) {
heap->generic_count = 0;
mi_heap_generic_collect(heap);
}
// find (or allocate) a page of the right size
mi_page_t* page = mi_find_page(heap, size, huge_alignment);
if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more
mi_heap_generic_collect(heap);
mi_heap_collect(heap, true /* force */);
page = mi_find_page(heap, size, huge_alignment);
}
@ -988,6 +940,8 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(mi_page_block_size(page) >= size);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
// and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
if mi_unlikely(zero && mi_page_is_huge(page)) {

View file

@ -239,6 +239,9 @@ void _mi_prim_thread_done_auto_done(void) {
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
#endif
bool _mi_prim_thread_is_in_threadpool(void) {
return false;
}

View file

@ -62,6 +62,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include <sys/syscall.h>
#endif
#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this?
//------------------------------------------------------------------------------------
// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
@ -147,7 +148,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
}
#endif
}
config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this?
config->large_page_size = MI_UNIX_LARGE_PAGE_SIZE;
config->has_overcommit = unix_detect_overcommit();
config->has_partial_free = true; // mmap can free in parts
config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE)
@ -362,6 +363,9 @@ int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool comm
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
mi_assert_internal(commit || !allow_large);
mi_assert_internal(try_alignment > 0);
if (hint_addr == NULL && size >= 8*MI_UNIX_LARGE_PAGE_SIZE && try_alignment > 1 && _mi_is_power_of_two(try_alignment) && try_alignment < MI_UNIX_LARGE_PAGE_SIZE) {
try_alignment = MI_UNIX_LARGE_PAGE_SIZE; // try to align along large page size for larger allocations
}
*is_zero = true;
int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
@ -409,7 +413,7 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
int err = 0;
// decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
err = unix_madvise(start, size, MADV_DONTNEED);
#if !MI_DEBUG && !MI_SECURE
#if !MI_DEBUG && MI_SECURE<=2
*needs_recommit = false;
#else
*needs_recommit = true;
@ -479,7 +483,7 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
bool is_large = true;
*is_zero = true;
*addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
*addr = unix_mmap(hint_addr, size, MI_ARENA_SLICE_ALIGN, PROT_READ | PROT_WRITE, true, true, &is_large);
if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
unsigned long numa_mask = (1UL << numa_node);
// TODO: does `mbind` work correctly for huge OS pages? should we
@ -886,3 +890,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
}
#endif
bool _mi_prim_thread_is_in_threadpool(void) {
return false;
}

View file

@ -277,3 +277,7 @@ void _mi_prim_thread_done_auto_done(void) {
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
bool _mi_prim_thread_is_in_threadpool(void) {
return false;
}

View file

@ -17,6 +17,14 @@ terms of the MIT license. A copy of the license can be found in the file
// Dynamically bind Windows API points for portability
//---------------------------------------------
#if defined(_MSC_VER)
#pragma warning(disable:28159) // don't use GetVersion
#pragma warning(disable:4996) // don't use GetVersion
#endif
static DWORD win_major_version = 6;
static DWORD win_minor_version = 0;
// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility)
// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
@ -108,16 +116,25 @@ static bool win_enable_large_os_pages(size_t* large_page_size)
// Initialize
//---------------------------------------------
static DWORD win_allocation_granularity = 64*MI_KiB;
void _mi_prim_mem_init( mi_os_mem_config_t* config )
{
config->has_overcommit = false;
config->has_partial_free = false;
config->has_virtual_reserve = true;
// windows version
const DWORD win_version = GetVersion();
win_major_version = (DWORD)(LOBYTE(LOWORD(win_version)));
win_minor_version = (DWORD)(HIBYTE(LOWORD(win_version)));
// get the page size
SYSTEM_INFO si;
GetSystemInfo(&si);
if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; }
if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; }
if (si.dwAllocationGranularity > 0) {
config->alloc_granularity = si.dwAllocationGranularity;
win_allocation_granularity = si.dwAllocationGranularity;
}
// get virtual address bits
if ((uintptr_t)si.lpMaximumApplicationAddress > 0) {
const size_t vbits = MI_INTPTR_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress);
@ -127,7 +144,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
ULONGLONG memInKiB = 0;
if (GetPhysicallyInstalledSystemMemory(&memInKiB)) {
if (memInKiB > 0 && memInKiB < (SIZE_MAX / MI_KiB)) {
config->physical_memory = memInKiB * MI_KiB;
config->physical_memory = (size_t)(memInKiB * MI_KiB);
}
}
// get the VirtualAlloc2 function
@ -175,7 +192,7 @@ int _mi_prim_free(void* addr, size_t size ) {
// the start of the region.
MEMORY_BASIC_INFORMATION info = { 0 };
VirtualQuery(addr, &info, sizeof(info));
if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) {
if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)(4*MI_MiB)) {
errcode = 0;
err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0);
if (err) { errcode = GetLastError(); }
@ -203,7 +220,7 @@ static void* win_virtual_alloc_prim_once(void* addr, size_t size, size_t try_ali
}
#endif
// on modern Windows try use VirtualAlloc2 for aligned allocation
if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
if (addr == NULL && try_alignment > win_allocation_granularity && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 };
reqs.Alignment = try_alignment;
MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };
@ -239,7 +256,7 @@ static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignmen
// success, return the address
return p;
}
else if (max_retry_msecs > 0 && (try_alignment <= 2*MI_SEGMENT_ALIGN) &&
else if (max_retry_msecs > 0 && (try_alignment <= 8*MI_MiB) &&
(flags&MEM_COMMIT) != 0 && (flags&MEM_LARGE_PAGES) == 0 &&
win_is_out_of_memory_error(GetLastError())) {
// if committing regular memory and being out-of-memory,
@ -815,3 +832,16 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
mi_allocator_done();
}
#endif
bool _mi_prim_thread_is_in_threadpool(void) {
#if (MI_ARCH_X64 || MI_ARCH_X86)
if (win_major_version >= 6) {
// check if this thread belongs to a windows threadpool
// see: <https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/pebteb/teb/index.htm>
_TEB* const teb = NtCurrentTeb();
void* const pool_data = *((void**)((uint8_t*)teb + (MI_SIZE_BITS == 32 ? 0x0F90 : 0x1778)));
return (pool_data != NULL);
}
#endif
return false;
}

View file

@ -7,7 +7,6 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/prim.h" // _mi_prim_random_buf
#include <string.h> // memset
/* ----------------------------------------------------------------------------
We use our own PRNG to keep predictable performance of random number generation
@ -33,15 +32,11 @@ The implementation uses regular C code which compiles very well on modern compil
(gcc x64 has no register spills, and clang 6+ uses SSE instructions)
-----------------------------------------------------------------------------*/
static inline uint32_t rotl(uint32_t x, uint32_t shift) {
return (x << shift) | (x >> (32 - shift));
}
static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
x[a] += x[b]; x[d] = mi_rotl32(x[d] ^ x[a], 16);
x[c] += x[d]; x[b] = mi_rotl32(x[b] ^ x[c], 12);
x[a] += x[b]; x[d] = mi_rotl32(x[d] ^ x[a], 8);
x[c] += x[d]; x[b] = mi_rotl32(x[b] ^ x[c], 7);
}
static void chacha_block(mi_random_ctx_t* ctx)
@ -99,7 +94,7 @@ static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t no
// since we only use chacha for randomness (and not encryption) we
// do not _need_ to read 32-bit values as little endian but we do anyways
// just for being compatible :-)
memset(ctx, 0, sizeof(*ctx));
_mi_memzero(ctx, sizeof(*ctx));
for (size_t i = 0; i < 4; i++) {
const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
ctx->input[i] = read32(sigma,i);
@ -114,7 +109,7 @@ static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t no
}
static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
memset(ctx_new, 0, sizeof(*ctx_new));
_mi_memzero(ctx_new, sizeof(*ctx_new));
_mi_memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
ctx_new->input[12] = 0;
ctx_new->input[13] = 0;
@ -160,7 +155,7 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim
uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random
x ^= _mi_prim_clock_now();
x ^= _mi_prim_clock_now();
// and do a few randomization steps
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
for (uintptr_t i = 0; i < max; i++) {

View file

@ -1,136 +0,0 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* -----------------------------------------------------------
The following functions are to reliably find the segment or
block that encompasses any pointer p (or NULL if it is not
in any of our segments).
We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
set to 1 if it contains the segment meta data.
----------------------------------------------------------- */
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
// Reduce total address space to reduce .bss (due to the `mi_segment_map`)
#if (MI_INTPTR_SIZE > 4) && MI_TRACK_ASAN
#define MI_SEGMENT_MAP_MAX_ADDRESS (128*1024ULL*MI_GiB) // 128 TiB (see issue #881)
#elif (MI_INTPTR_SIZE > 4)
#define MI_SEGMENT_MAP_MAX_ADDRESS (48*1024ULL*MI_GiB) // 48 TiB
#else
#define MI_SEGMENT_MAP_MAX_ADDRESS (UINT32_MAX)
#endif
#define MI_SEGMENT_MAP_PART_SIZE (MI_INTPTR_SIZE*MI_KiB - 128) // 128 > sizeof(mi_memid_t) !
#define MI_SEGMENT_MAP_PART_BITS (8*MI_SEGMENT_MAP_PART_SIZE)
#define MI_SEGMENT_MAP_PART_ENTRIES (MI_SEGMENT_MAP_PART_SIZE / MI_INTPTR_SIZE)
#define MI_SEGMENT_MAP_PART_BIT_SPAN (MI_SEGMENT_ALIGN)
#define MI_SEGMENT_MAP_PART_SPAN (MI_SEGMENT_MAP_PART_BITS * MI_SEGMENT_MAP_PART_BIT_SPAN)
#define MI_SEGMENT_MAP_MAX_PARTS ((MI_SEGMENT_MAP_MAX_ADDRESS / MI_SEGMENT_MAP_PART_SPAN) + 1)
// A part of the segment map.
typedef struct mi_segmap_part_s {
mi_memid_t memid;
_Atomic(uintptr_t) map[MI_SEGMENT_MAP_PART_ENTRIES];
} mi_segmap_part_t;
// Allocate parts on-demand to reduce .bss footprint
static _Atomic(mi_segmap_part_t*) mi_segment_map[MI_SEGMENT_MAP_MAX_PARTS]; // = { NULL, .. }
static mi_segmap_part_t* mi_segment_map_index_of(const mi_segment_t* segment, bool create_on_demand, size_t* idx, size_t* bitidx) {
// note: segment can be invalid or NULL.
mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
*idx = 0;
*bitidx = 0;
if ((uintptr_t)segment >= MI_SEGMENT_MAP_MAX_ADDRESS) return NULL;
const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_MAP_PART_SPAN;
if (segindex >= MI_SEGMENT_MAP_MAX_PARTS) return NULL;
mi_segmap_part_t* part = mi_atomic_load_ptr_relaxed(mi_segmap_part_t, &mi_segment_map[segindex]);
// allocate on demand to reduce .bss footprint
if (part == NULL) {
if (!create_on_demand) return NULL;
mi_memid_t memid;
part = (mi_segmap_part_t*)_mi_os_alloc(sizeof(mi_segmap_part_t), &memid);
if (part == NULL) return NULL;
part->memid = memid;
mi_segmap_part_t* expected = NULL;
if (!mi_atomic_cas_ptr_strong_release(mi_segmap_part_t, &mi_segment_map[segindex], &expected, part)) {
_mi_os_free(part, sizeof(mi_segmap_part_t), memid);
part = expected;
if (part == NULL) return NULL;
}
}
mi_assert(part != NULL);
const uintptr_t offset = ((uintptr_t)segment) % MI_SEGMENT_MAP_PART_SPAN;
const uintptr_t bitofs = offset / MI_SEGMENT_MAP_PART_BIT_SPAN;
*idx = bitofs / MI_INTPTR_BITS;
*bitidx = bitofs % MI_INTPTR_BITS;
return part;
}
void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
if (segment->memid.memkind == MI_MEM_ARENA) return; // we lookup segments first in the arena's and don't need the segment map
size_t index;
size_t bitidx;
mi_segmap_part_t* part = mi_segment_map_index_of(segment, true /* alloc map if needed */, &index, &bitidx);
if (part == NULL) return; // outside our address range..
uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
uintptr_t newmask;
do {
newmask = (mask | ((uintptr_t)1 << bitidx));
} while (!mi_atomic_cas_weak_release(&part->map[index], &mask, newmask));
}
void _mi_segment_map_freed_at(const mi_segment_t* segment) {
if (segment->memid.memkind == MI_MEM_ARENA) return;
size_t index;
size_t bitidx;
mi_segmap_part_t* part = mi_segment_map_index_of(segment, false /* don't alloc if not present */, &index, &bitidx);
if (part == NULL) return; // outside our address range..
uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
uintptr_t newmask;
do {
newmask = (mask & ~((uintptr_t)1 << bitidx));
} while (!mi_atomic_cas_weak_release(&part->map[index], &mask, newmask));
}
// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
static mi_segment_t* _mi_segment_of(const void* p) {
if (p == NULL) return NULL;
mi_segment_t* segment = _mi_ptr_segment(p); // segment can be NULL
size_t index;
size_t bitidx;
mi_segmap_part_t* part = mi_segment_map_index_of(segment, false /* dont alloc if not present */, &index, &bitidx);
if (part == NULL) return NULL;
const uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(cookie_ok); MI_UNUSED(cookie_ok);
return segment; // yes, allocated by us
}
return NULL;
}
// Is this a valid pointer in our heap?
static bool mi_is_valid_pointer(const void* p) {
// first check if it is in an arena, then check if it is OS allocated
return (_mi_arena_contains(p) || _mi_segment_of(p) != NULL);
}
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
return mi_is_valid_pointer(p);
}
void _mi_segment_map_unsafe_destroy(void) {
for (size_t i = 0; i < MI_SEGMENT_MAP_MAX_PARTS; i++) {
mi_segmap_part_t* part = mi_atomic_exchange_ptr_relaxed(mi_segmap_part_t, &mi_segment_map[i], NULL);
if (part != NULL) {
_mi_os_free(part, sizeof(mi_segmap_part_t), part->memid);
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -20,10 +20,11 @@ terms of the MIT license. A copy of the license can be found in the file
// containing the whole library. If it is linked first
// it will override all the standard library allocation
// functions (on Unix's).
#include "alloc.c" // includes alloc-override.c
#include "alloc.c" // includes alloc-override.c and free.c
#include "alloc-aligned.c"
#include "alloc-posix.c"
#include "arena.c"
#include "arena-meta.c"
#include "bitmap.c"
#include "heap.c"
#include "init.c"
@ -31,9 +32,8 @@ terms of the MIT license. A copy of the license can be found in the file
#include "options.c"
#include "os.c"
#include "page.c" // includes page-queue.c
#include "page-map.c"
#include "random.c"
#include "segment.c"
#include "segment-map.c"
#include "stats.c"
#include "prim/prim.c"
#if MI_OSX_ZONE

View file

@ -19,85 +19,92 @@ terms of the MIT license. A copy of the license can be found in the file
Statistics operations
----------------------------------------------------------- */
static bool mi_is_in_main(void* stat) {
return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main
&& (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t)));
static void mi_stat_update_mt(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return;
// add atomically
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
if (amount > 0) {
mi_atomic_addi64_relaxed(&stat->allocated, amount);
}
else {
mi_atomic_addi64_relaxed(&stat->freed, -amount);
}
}
static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return;
if mi_unlikely(mi_is_in_main(stat))
{
// add atomically (for abandoned pages)
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
if (amount > 0) {
mi_atomic_addi64_relaxed(&stat->allocated,amount);
}
else {
mi_atomic_addi64_relaxed(&stat->freed, -amount);
}
// add thread local
stat->current += amount;
if (stat->current > stat->peak) stat->peak = stat->current;
if (amount > 0) {
stat->allocated += amount;
}
else {
// add thread local
stat->current += amount;
if (stat->current > stat->peak) stat->peak = stat->current;
if (amount > 0) {
stat->allocated += amount;
}
else {
stat->freed += -amount;
}
stat->freed += -amount;
}
}
// Adjust stats to compensate; for example before committing a range,
// first adjust downwards with parts that were already committed so
// first adjust downwards with parts that were already committed so
// we avoid double counting.
static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount) {
static void mi_stat_adjust_mt(mi_stat_count_t* stat, int64_t amount, bool on_alloc) {
if (amount == 0) return;
if mi_unlikely(mi_is_in_main(stat))
{
// adjust atomically
mi_atomic_addi64_relaxed(&stat->current, amount);
mi_atomic_addi64_relaxed(&stat->allocated, amount);
mi_atomic_addi64_relaxed(&stat->freed, amount);
}
else {
// don't affect the peak
stat->current += amount;
// add to both
// adjust atomically
mi_atomic_addi64_relaxed(&stat->current, amount);
mi_atomic_addi64_relaxed((on_alloc ? &stat->allocated : &stat->freed), amount);
}
static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount, bool on_alloc) {
if (amount == 0) return;
stat->current += amount;
if (on_alloc) {
stat->allocated += amount;
stat->freed += amount;
}
}
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
if (mi_is_in_main(stat)) {
mi_atomic_addi64_relaxed( &stat->count, 1 );
mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount );
}
else {
stat->count++;
stat->total += amount;
stat->freed += amount;
}
}
void _mi_stat_increase(mi_stat_count_t* stat, size_t amount) {
void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount) {
mi_atomic_addi64_relaxed(&stat->count, 1);
mi_atomic_addi64_relaxed(&stat->total, (int64_t)amount);
}
void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
stat->count++;
stat->total += amount;
}
void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount) {
mi_stat_update_mt(stat, (int64_t)amount);
}
void __mi_stat_increase(mi_stat_count_t* stat, size_t amount) {
mi_stat_update(stat, (int64_t)amount);
}
void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount) {
mi_stat_update_mt(stat, -((int64_t)amount));
}
void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
mi_stat_update(stat, -((int64_t)amount));
}
void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount) {
mi_stat_adjust(stat, (int64_t)amount);
void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc) {
mi_stat_adjust_mt(stat, (int64_t)amount, on_alloc);
}
void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc) {
mi_stat_adjust(stat, (int64_t)amount, on_alloc);
}
void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) {
mi_stat_adjust(stat, -((int64_t)amount));
void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc) {
mi_stat_adjust_mt(stat, -((int64_t)amount), on_alloc);
}
void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_alloc) {
mi_stat_adjust(stat, -((int64_t)amount), on_alloc);
}
// must be thread safe as it is called from stats_merge
static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) {
@ -119,7 +126,6 @@ static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t
// must be thread safe as it is called from stats_merge
static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
if (stats==src) return;
mi_stat_add(&stats->segments, &src->segments,1);
mi_stat_add(&stats->pages, &src->pages,1);
mi_stat_add(&stats->reserved, &src->reserved, 1);
mi_stat_add(&stats->committed, &src->committed, 1);
@ -128,11 +134,9 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
mi_stat_add(&stats->page_committed, &src->page_committed, 1);
mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1);
mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1);
mi_stat_add(&stats->threads, &src->threads, 1);
mi_stat_add(&stats->malloc, &src->malloc, 1);
mi_stat_add(&stats->segments_cache, &src->segments_cache, 1);
mi_stat_add(&stats->normal, &src->normal, 1);
mi_stat_add(&stats->huge, &src->huge, 1);
mi_stat_add(&stats->giant, &src->giant, 1);
@ -146,7 +150,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1);
mi_stat_counter_add(&stats->searches, &src->searches, 1);
mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1);
mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1);
mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1);
mi_stat_counter_add(&stats->guarded_alloc_count, &src->guarded_alloc_count, 1);
#if MI_STAT>1
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
@ -165,7 +169,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
// unit == 0: count as decimal
// unit < 0 : count in binary
static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) {
char buf[32]; buf[0] = 0;
char buf[32]; _mi_memzero_var(buf);
int len = 32;
const char* suffix = (unit <= 0 ? " " : "B");
const int64_t base = (unit == 0 ? 1000 : 1024);
@ -330,7 +334,7 @@ static void mi_cdecl mi_buffered_out(const char* msg, void* arg) {
static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept {
// wrap the output function to be line buffered
char buf[256];
char buf[256]; _mi_memzero_var(buf);
buffered_t buffer = { out0, arg0, NULL, 0, 255 };
buffer.buf = buf;
mi_output_fun* out = &mi_buffered_out;
@ -343,7 +347,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
#endif
#if MI_STAT
mi_stat_print(&stats->normal, "normal", (stats->normal_count.count == 0 ? 1 : -(stats->normal.allocated / stats->normal_count.count)), out, arg);
mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg);
mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg);
mi_stat_count_t total = { 0,0,0,0 };
mi_stat_add(&total, &stats->normal, 1);
mi_stat_add(&total, &stats->huge, 1);
@ -357,21 +361,24 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, "");
mi_stat_peak_print(&stats->reset, "reset", 1, out, arg );
mi_stat_peak_print(&stats->purged, "purged", 1, out, arg );
mi_stat_print(&stats->page_committed, "touched", 1, out, arg);
mi_stat_print(&stats->segments, "segments", -1, out, arg);
mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg);
mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg);
mi_stat_print(&stats->pages, "pages", -1, out, arg);
//mi_stat_print(&stats->segments, "segments", -1, out, arg);
//mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg);
//mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg);
mi_stat_print_ex(&stats->page_committed, "touched", 1, out, arg, "");
mi_stat_print_ex(&stats->pages, "pages", -1, out, arg, "");
mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg);
mi_stat_counter_print(&stats->pages_reclaim_on_alloc, "-reclaima", out, arg);
mi_stat_counter_print(&stats->pages_reclaim_on_free, "-reclaimf", out, arg);
mi_stat_counter_print(&stats->pages_reabandon_full, "-reabandon", out, arg);
mi_stat_counter_print(&stats->pages_unabandon_busy_wait, "-waits", out, arg);
mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg);
mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg);
mi_stat_counter_print(&stats->arena_count, "arenas", out, arg);
mi_stat_counter_print(&stats->arena_crossover_count, "-crossover", out, arg);
mi_stat_counter_print(&stats->arena_rollback_count, "-rollback", out, arg);
mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg);
mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
mi_stat_counter_print(&stats->reset_calls, "resets", out, arg);
mi_stat_counter_print(&stats->purge_calls, "purges", out, arg);
mi_stat_counter_print(&stats->arena_purges, "-purges", out, arg);
mi_stat_counter_print(&stats->mmap_calls, "mmap calls", out, arg);
mi_stat_counter_print(&stats->commit_calls, " -commit", out, arg);
mi_stat_counter_print(&stats->reset_calls, "-reset", out, arg);
mi_stat_counter_print(&stats->purge_calls, "-purge", out, arg);
mi_stat_counter_print(&stats->guarded_alloc_count, "guarded", out, arg);
mi_stat_print(&stats->threads, "threads", -1, out, arg);
mi_stat_counter_print_avg(&stats->searches, "searches", out, arg);
@ -399,36 +406,37 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
static mi_msecs_t mi_process_start; // = 0
static mi_stats_t* mi_stats_get_default(void) {
mi_heap_t* heap = mi_heap_get_default();
return &heap->tld->stats;
}
static void mi_stats_merge_from(mi_stats_t* stats) {
if (stats != &_mi_stats_main) {
mi_stats_add(&_mi_stats_main, stats);
memset(stats, 0, sizeof(mi_stats_t));
}
// return thread local stats
static mi_stats_t* mi_get_tld_stats(void) {
return &mi_heap_get_default()->tld->stats;
}
void mi_stats_reset(void) mi_attr_noexcept {
mi_stats_t* stats = mi_stats_get_default();
if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); }
memset(&_mi_stats_main, 0, sizeof(mi_stats_t));
mi_stats_t* stats = mi_get_tld_stats();
mi_subproc_t* subproc = _mi_subproc();
if (stats != &subproc->stats) { _mi_memzero(stats, sizeof(mi_stats_t)); }
_mi_memzero(&subproc->stats, sizeof(mi_stats_t));
if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); };
}
void mi_stats_merge(void) mi_attr_noexcept {
mi_stats_merge_from( mi_stats_get_default() );
void _mi_stats_merge_from(mi_stats_t* to, mi_stats_t* from) {
if (to != from) {
mi_stats_add(to, from);
_mi_memzero(from, sizeof(mi_stats_t));
}
}
void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done`
mi_stats_merge_from(stats);
_mi_stats_merge_from(&_mi_subproc()->stats, stats);
}
void mi_stats_merge(void) mi_attr_noexcept {
_mi_stats_done( mi_get_tld_stats() );
}
void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
mi_stats_merge_from(mi_stats_get_default());
_mi_stats_print(&_mi_stats_main, out, arg);
mi_stats_merge();
_mi_stats_print(&_mi_subproc()->stats, out, arg);
}
void mi_stats_print(void* out) mi_attr_noexcept {
@ -437,7 +445,7 @@ void mi_stats_print(void* out) mi_attr_noexcept {
}
void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
_mi_stats_print(mi_stats_get_default(), out, arg);
_mi_stats_print(mi_get_tld_stats(), out, arg);
}
@ -471,11 +479,12 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) {
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept
{
mi_subproc_t* subproc = _mi_subproc();
mi_process_info_t pinfo;
_mi_memzero_var(pinfo);
pinfo.elapsed = _mi_clock_end(mi_process_start);
pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&subproc->stats.committed.current)));
pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&subproc->stats.committed.peak)));
pinfo.current_rss = pinfo.current_commit;
pinfo.peak_rss = pinfo.peak_commit;
pinfo.utime = 0;
@ -483,7 +492,7 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
pinfo.page_faults = 0;
_mi_prim_process_info(&pinfo);
if (elapsed_msecs!=NULL) *elapsed_msecs = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX));
if (user_msecs!=NULL) *user_msecs = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX));
if (system_msecs!=NULL) *system_msecs = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX));

View file

@ -50,7 +50,6 @@ int main() {
// mi_bins();
void* p1 = malloc(78);
void* p2 = malloc(24);
free(p1);
@ -83,7 +82,7 @@ int main() {
static void invalid_free() {
free((void*)0xBADBEEF);
realloc((void*)0xBADBEEF,10);
realloc((void*)0xBADBEEF, 10);
}
static void block_overflow1() {
@ -181,7 +180,7 @@ static void test_process_info(void) {
size_t peak_commit = 0;
size_t page_faults = 0;
for (int i = 0; i < 100000; i++) {
void* p = calloc(100,10);
void* p = calloc(100, 10);
free(p);
}
mi_process_info(&elapsed, &user_msecs, &system_msecs, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
@ -239,8 +238,8 @@ static void test_heap_walk(void) {
}
static void test_canary_leak(void) {
char* p = mi_mallocn_tp(char,23);
for(int i = 0; i < 23; i++) {
char* p = mi_mallocn_tp(char, 22);
for (int i = 0; i < 22; i++) {
p[i] = '0'+i;
}
puts(p);
@ -286,15 +285,15 @@ static void test_manage_os_memory(void) {
static void test_large_pages(void) {
mi_memid_t memid;
#if 0
#if 0
size_t pages_reserved;
size_t page_size;
uint8_t* p = (uint8_t*)_mi_os_alloc_huge_os_pages(1, -1, 30000, &pages_reserved, &page_size, &memid);
const size_t req_size = pages_reserved * page_size;
#else
#else
const size_t req_size = 64*MI_MiB;
uint8_t* p = (uint8_t*)_mi_os_alloc(req_size,&memid,NULL);
#endif
uint8_t* p = (uint8_t*)_mi_os_alloc(req_size, &memid, NULL);
#endif
p[0] = 1;
@ -317,8 +316,8 @@ static void test_large_pages(void) {
#if 0
#include <stdint.h>
#include <stdbool.h>
#include <mimalloc/bits.h>
#define MI_INTPTR_SIZE 8
#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE)
#define MI_BIN_HUGE 100
@ -370,8 +369,6 @@ uint8_t _mi_bsr(uintptr_t x) {
#endif
}
static inline size_t _mi_wsize_from_size(size_t size) {
return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
}
@ -408,7 +405,9 @@ extern inline uint8_t _mi_bin8(size_t size) {
#endif
wsize--;
// find the highest bit
uint8_t b = mi_bsr32((uint32_t)wsize);
size_t idx;
mi_bsr(wsize, &idx);
uint8_t b = (uint8_t)idx;
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin
@ -440,7 +439,9 @@ static inline uint8_t _mi_bin4(size_t size) {
bin = MI_BIN_HUGE;
}
else {
uint8_t b = mi_bsr32((uint32_t)wsize);
size_t idx;
mi_bsr(wsize, &idx);
uint8_t b = (uint8_t)idx;
bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
}
return bin;
@ -456,7 +457,9 @@ static size_t _mi_binx4(size_t wsize) {
bin = (uint8_t)wsize;
}
else {
uint8_t b = mi_bsr32((uint32_t)wsize);
size_t idx;
mi_bsr(wsize, &idx);
uint8_t b = (uint8_t)idx;
if (b <= 1) return wsize;
bin = ((b << 1) | (wsize >> (b - 1))&0x01) + 3;
}
@ -465,7 +468,9 @@ static size_t _mi_binx4(size_t wsize) {
static size_t _mi_binx8(size_t bsize) {
if (bsize<=1) return bsize;
uint8_t b = mi_bsr32((uint32_t)bsize);
size_t idx;
mi_bsr(bsize, &idx);
uint8_t b = (uint8_t)idx;
if (b <= 2) return bsize;
size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5;
return bin;
@ -483,8 +488,10 @@ static inline size_t mi_bin(size_t wsize) {
}
else {
wsize--;
assert(wsize>0);
// find the highest bit
uint8_t b = (uint8_t)mi_bsr32((uint32_t)wsize); // note: wsize != 0
uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize));
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin

View file

@ -388,7 +388,7 @@ static void test_mt_shutdown()
// issue #372
static void fail_aslr() {
size_t sz = (4ULL << 40); // 4TiB
uint64_t sz = (4ULL << 40); // 4TiB
void* p = malloc(sz);
printf("pointer p: %p: area up to %p\n", p, (uint8_t*)p + sz);
*(int*)0x5FFFFFFF000 = 0; // should segfault

View file

@ -34,7 +34,7 @@ we therefore test the API over various inputs. Please add more tests :-)
#include "mimalloc.h"
// #include "mimalloc/internal.h"
#include "mimalloc/types.h" // for MI_DEBUG and MI_BLOCK_ALIGNMENT_MAX
#include "mimalloc/types.h" // for MI_DEBUG and MI_PAGE_MAX_OVERALLOC_ALIGN
#include "testhelper.h"
@ -169,7 +169,7 @@ int main(void) {
/*
CHECK_BODY("malloc-aligned6") {
bool ok = true;
for (size_t align = 1; align <= MI_BLOCK_ALIGNMENT_MAX && ok; align *= 2) {
for (size_t align = 1; align <= MI_PAGE_MAX_OVERALLOC_ALIGN && ok; align *= 2) {
void* ps[8];
for (int i = 0; i < 8 && ok; i++) {
ps[i] = mi_malloc_aligned(align*13 // size
@ -186,16 +186,16 @@ int main(void) {
};
*/
CHECK_BODY("malloc-aligned7") {
void* p = mi_malloc_aligned(1024,MI_BLOCK_ALIGNMENT_MAX);
void* p = mi_malloc_aligned(1024,MI_PAGE_MAX_OVERALLOC_ALIGN);
mi_free(p);
result = ((uintptr_t)p % MI_BLOCK_ALIGNMENT_MAX) == 0;
result = ((uintptr_t)p % MI_PAGE_MAX_OVERALLOC_ALIGN) == 0;
};
CHECK_BODY("malloc-aligned8") {
bool ok = true;
for (int i = 0; i < 5 && ok; i++) {
int n = (1 << i);
void* p = mi_malloc_aligned(1024, n * MI_BLOCK_ALIGNMENT_MAX);
ok = ((uintptr_t)p % (n*MI_BLOCK_ALIGNMENT_MAX)) == 0;
void* p = mi_malloc_aligned(1024, n * MI_PAGE_MAX_OVERALLOC_ALIGN);
ok = ((uintptr_t)p % (n*MI_PAGE_MAX_OVERALLOC_ALIGN)) == 0;
mi_free(p);
}
result = ok;
@ -203,7 +203,7 @@ int main(void) {
CHECK_BODY("malloc-aligned9") { // test large alignments
bool ok = true;
void* p[8];
size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 0 };
size_t sizes[8] = { 8, 512, 1024 * 1024, MI_PAGE_MAX_OVERALLOC_ALIGN, MI_PAGE_MAX_OVERALLOC_ALIGN + 1, 2 * MI_PAGE_MAX_OVERALLOC_ALIGN, 8 * MI_PAGE_MAX_OVERALLOC_ALIGN, 0 };
for (int i = 0; i < 28 && ok; i++) {
int align = (1 << i);
for (int j = 0; j < 8 && ok; j++) {

View file

@ -40,6 +40,19 @@ static int ITER = 20;
static int THREADS = 8;
static int SCALE = 10;
static int ITER = 10;
#elif 0
static int THREADS = 4;
static int SCALE = 10;
static int ITER = 20;
#elif 0
static int THREADS = 32;
static int SCALE = 50;
static int ITER = 50;
#elif 0
static int THREADS = 32;
static int SCALE = 25;
static int ITER = 50;
#define ALLOW_LARGE true
#else
static int THREADS = 32; // more repeatable if THREADS <= #processors
static int SCALE = 50; // scaling factor
@ -50,7 +63,12 @@ static int ITER = 50; // N full iterations destructing and re-creating a
#define STRESS // undefine for leak test
static bool allow_large_objects = false; // allow very large objects? (set to `true` if SCALE>100)
#ifndef ALLOW_LARGE
#define ALLOW_LARGE false
#endif
static bool allow_large_objects = ALLOW_LARGE; // allow very large objects? (set to `true` if SCALE>100)
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
static bool main_participates = false; // main thread participates as a worker too
@ -66,7 +84,7 @@ static bool main_participates = false; // main thread participates as a
#define custom_free(p) mi_free(p)
#ifndef NDEBUG
#define HEAP_WALK // walk the heap objects?
#define xHEAP_WALK // walk the heap objects?
#endif
#endif
@ -241,9 +259,21 @@ static void test_stress(void) {
//mi_debug_show_arenas(true);
#endif
#if !defined(NDEBUG) || defined(MI_TSAN)
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
if ((n + 1) % 10 == 0) {
printf("- iterations left: %3d\n", ITER - (n + 1));
mi_debug_show_arenas(true);
//mi_collect(true);
//mi_debug_show_arenas(true);
}
#endif
}
// clean up
for (int i = 0; i < TRANSFERS; i++) {
void* p = atomic_exchange_ptr(&transfer[i], NULL);
if (p != NULL) {
free_items(p);
}
}
}
#ifndef STRESS
@ -274,6 +304,10 @@ int main(int argc, char** argv) {
#endif
#if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */);
//mi_option_set(mi_option_purge_delay,10);
#endif
#if defined(NDEBUG) && !defined(USE_STD_MALLOC)
// mi_option_set(mi_option_purge_delay,-1);
#endif
#ifndef USE_STD_MALLOC
mi_stats_reset();
@ -318,7 +352,7 @@ int main(int argc, char** argv) {
#ifndef NDEBUG
mi_debug_show_arenas(true);
mi_collect(true);
#endif
#endif
#endif
mi_stats_print(NULL);
//bench_end_program();
@ -341,9 +375,10 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
thread_entry_fun = fun;
DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
thandles[0] = GetCurrentThread(); // avoid lint warning
const size_t start = (main_participates ? 1 : 0);
for (size_t i = start; i < nthreads; i++) {
thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]);
thandles[i] = CreateThread(0, 8*1024L, &thread_entry, (void*)(i), 0, &tids[i]);
}
if (main_participates) fun(0); // run the main thread as well
for (size_t i = start; i < nthreads; i++) {