From 6165177cb1461c49df3d3baca04d0da966544d51 Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Tue, 20 Aug 2019 16:19:16 -0700 Subject: [PATCH 01/40] provide a set of solutions that use the vs2019 vcruntime --- ide/vs2019/mimalloc-override-test.vcxproj | 165 ++++++++++++++ ide/vs2019/mimalloc-override.vcxproj | 243 +++++++++++++++++++++ ide/vs2019/mimalloc-test-stress.vcxproj | 159 ++++++++++++++ ide/vs2019/mimalloc-test.vcxproj | 163 ++++++++++++++ ide/vs2019/mimalloc.sln | 71 ++++++ ide/vs2019/mimalloc.vcxproj | 252 ++++++++++++++++++++++ 6 files changed, 1053 insertions(+) create mode 100644 ide/vs2019/mimalloc-override-test.vcxproj create mode 100644 ide/vs2019/mimalloc-override.vcxproj create mode 100644 ide/vs2019/mimalloc-test-stress.vcxproj create mode 100644 ide/vs2019/mimalloc-test.vcxproj create mode 100644 ide/vs2019/mimalloc.sln create mode 100644 ide/vs2019/mimalloc.vcxproj diff --git a/ide/vs2019/mimalloc-override-test.vcxproj b/ide/vs2019/mimalloc-override-test.vcxproj new file mode 100644 index 00000000..d75a67e1 --- /dev/null +++ b/ide/vs2019/mimalloc-override-test.vcxproj @@ -0,0 +1,165 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {FEF7868F-750E-4C21-A04D-22707CC66879} + mimalloc-override-test + 10.0 + mimalloc-override-test + + + + Application + true + v142 + + + Application + false + v142 + true + + + Application + true + v142 + + + Application + false + v142 + true + + + + + + + + + + + + + + + + + + + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + + Level3 + Disabled + true + true + ..\..\include + MultiThreadedDebugDLL + + + Console + + + + + Level3 + Disabled + true + true + ..\..\include + MultiThreadedDebugDLL + + + Console + + + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + false + + + + + Level3 + MaxSpeed + true + true + true + true + ..\..\include + _MBCS;%(PreprocessorDefinitions);NDEBUG + MultiThreadedDLL + + + true + true + Console + + + + + Level3 + MaxSpeed + true + true + true + true + ..\..\include + _MBCS;%(PreprocessorDefinitions);NDEBUG + MultiThreadedDLL + + + true + true + Console + + + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + {abb5eae7-b3e6-432e-b636-333449892ea7} + + + + + + \ No newline at end of file diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj new file mode 100644 index 00000000..1f3b7ee2 --- /dev/null +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -0,0 +1,243 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {ABB5EAE7-B3E6-432E-B636-333449892EA7} + mimalloc-override + 10.0 + mimalloc-override + + + + DynamicLibrary + true + v142 + + + DynamicLibrary + false + v142 + true + + + DynamicLibrary + true + v142 + + + DynamicLibrary + false + v142 + true + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + .dll + + + $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + .dll + + + $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + .dll + + + $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + .dll + + + + Level3 + Disabled + true + true + ../../include + MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions); + MultiThreadedDebugDLL + false + + + + + + + + + + + DllEntry + kernel32.lib;%(AdditionalDependencies) + + + + + + + + + Level3 + Disabled + true + true + ../../include + MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions); + MultiThreadedDebugDLL + false + + + + + + + + + + + DllEntry + kernel32.lib;%(AdditionalDependencies) + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + ../../include + MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);NDEBUG + AssemblyAndSourceCode + $(IntDir) + false + MultiThreadedDLL + + + true + true + DllEntry + kernel32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + ../../include + MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);NDEBUG + AssemblyAndSourceCode + $(IntDir) + false + MultiThreadedDLL + + + true + true + DllEntry + kernel32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + false + false + false + false + + + + + + + + + + + true + true + true + true + + + + + + + + + \ No newline at end of file diff --git a/ide/vs2019/mimalloc-test-stress.vcxproj b/ide/vs2019/mimalloc-test-stress.vcxproj new file mode 100644 index 00000000..6aed1cc1 --- /dev/null +++ b/ide/vs2019/mimalloc-test-stress.vcxproj @@ -0,0 +1,159 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {FEF7958F-750E-4C21-A04D-22707CC66878} + mimalloc-test-stress + 10.0 + mimalloc-test-stress + + + + Application + true + v142 + + + Application + false + v142 + true + + + Application + true + v142 + + + Application + false + v142 + true + + + + + + + + + + + + + + + + + + + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + + Level3 + Disabled + true + true + ..\..\include + + + Console + + + + + Level3 + Disabled + true + true + ..\..\include + + + Console + + + + + Level3 + MaxSpeed + true + true + true + true + ..\..\include + %(PreprocessorDefinitions);NDEBUG + + + true + true + Console + + + + + Level3 + MaxSpeed + true + true + true + true + ..\..\include + %(PreprocessorDefinitions);NDEBUG + + + true + true + Console + + + + + false + false + false + false + + + + + {abb5eae7-b3e6-432e-b636-333449892ea6} + + + + + + \ No newline at end of file diff --git a/ide/vs2019/mimalloc-test.vcxproj b/ide/vs2019/mimalloc-test.vcxproj new file mode 100644 index 00000000..1e901e45 --- /dev/null +++ b/ide/vs2019/mimalloc-test.vcxproj @@ -0,0 +1,163 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {FEF7858F-750E-4C21-A04D-22707CC66878} + mimalloctest + 10.0 + mimalloc-test + + + + Application + true + v142 + + + Application + false + v142 + true + + + Application + true + v142 + + + Application + false + v142 + true + + + + + + + + + + + + + + + + + + + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + + + + Level3 + Disabled + true + true + ..\..\include + stdcpp17 + + + Console + + + + + Level3 + Disabled + true + true + ..\..\include + stdcpp17 + + + Console + + + + + Level3 + MaxSpeed + true + true + true + true + ..\..\include + _MBCS;%(PreprocessorDefinitions);NDEBUG + stdcpp17 + + + true + true + Console + + + + + Level3 + MaxSpeed + true + true + true + true + ..\..\include + _MBCS;%(PreprocessorDefinitions);NDEBUG + stdcpp17 + + + true + true + Console + + + + + AssemblyAndSourceCode + AssemblyAndSourceCode + AssemblyAndSourceCode + AssemblyAndSourceCode + + + + + {abb5eae7-b3e6-432e-b636-333449892ea6} + + + + + + \ No newline at end of file diff --git a/ide/vs2019/mimalloc.sln b/ide/vs2019/mimalloc.sln new file mode 100644 index 00000000..aeab6b88 --- /dev/null +++ b/ide/vs2019/mimalloc.sln @@ -0,0 +1,71 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28010.2016 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64 + {ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64 + {ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32 + {ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32 + {ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64 + {ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64 + {ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32 + {ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32 + {FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64 + {FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64 + {FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32 + {FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32 + {FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64 + {FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64 + {FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32 + {FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32 + {ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64 + {ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64 + {ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32 + {ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32 + {ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64 + {ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64 + {ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32 + {ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32 + {FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64 + {FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64 + {FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32 + {FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32 + {FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64 + {FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64 + {FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32 + {FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32 + {FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64 + {FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64 + {FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32 + {FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32 + {FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64 + {FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64 + {FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32 + {FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A} + EndGlobalSection +EndGlobal diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj new file mode 100644 index 00000000..4d9563c2 --- /dev/null +++ b/ide/vs2019/mimalloc.vcxproj @@ -0,0 +1,252 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {ABB5EAE7-B3E6-432E-B636-333449892EA6} + mimalloc + 10.0 + mimalloc + + + + StaticLibrary + true + v142 + + + StaticLibrary + false + v142 + true + + + StaticLibrary + true + v142 + + + StaticLibrary + false + v142 + true + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + .lib + + + $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + .lib + + + $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + .lib + + + $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ + $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + .lib + + + + Level3 + Disabled + true + true + ../../include + MI_DEBUG=3;%(PreprocessorDefinitions); + Default + false + + + + + + + + + + + Level3 + Disabled + true + true + ../../include + MI_DEBUG=3;%(PreprocessorDefinitions); + Default + false + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + ../../include + %(PreprocessorDefinitions);NDEBUG + AssemblyAndSourceCode + $(IntDir) + false + false + AnySuitable + Neither + false + false + Default + + + true + true + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + ../../include + %(PreprocessorDefinitions);NDEBUG + AssemblyAndSourceCode + $(IntDir) + false + false + AnySuitable + Neither + false + false + Default + + + true + true + + + + + + + + + + + + + + + + + false + false + false + false + + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + + + + + + + + + true + true + true + true + + + + + + + + + + + + + + + + \ No newline at end of file From eb25093b13b57cb83113527b7df47fcfb1a427c3 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 21 Aug 2019 09:40:57 -0700 Subject: [PATCH 02/40] fix mi_cdecl for older clang versions --- include/mimalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 6615e2e2..9f27e463 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -53,8 +53,8 @@ terms of the MIT license. A copy of the license can be found in the file #else #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) - #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #endif + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #else #define mi_decl_thread __thread #define mi_decl_export From ee475fd8cd6005cecb2de3f7e5b87ceeab609095 Mon Sep 17 00:00:00 2001 From: Jakub Szymanski Date: Wed, 21 Aug 2019 11:11:36 -0700 Subject: [PATCH 03/40] add warning when no avaiable mem --- src/os.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/os.c b/src/os.c index b15d58d0..bcce5d7d 100644 --- a/src/os.c +++ b/src/os.c @@ -257,6 +257,9 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (p == NULL) { p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } + if (p == NULL) { + _mi_warning_message("unable to alloc mem error: err: %i size: 0x%x \n", GetLastError(), size); + } return p; } From 5d3bf1c8440ca300145ab6bb4d09cc3cef6257cc Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 23 Aug 2019 11:22:35 -0700 Subject: [PATCH 04/40] don't commit or reset in huge OS pages --- src/os.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/os.c b/src/os.c index bcce5d7d..9edcd064 100644 --- a/src/os.c +++ b/src/os.c @@ -539,7 +539,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ // page align in the range, commit liberally, decommit conservative size_t csize; void* start = mi_os_page_align_areax(conservative, addr, size, &csize); - if (csize == 0) return true; + if (csize == 0 || mi_os_is_huge_reserved(addr)) return true; int err = 0; if (commit) { _mi_stat_increase(&stats->committed, csize); @@ -591,7 +591,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); - if (csize == 0) return true; + if (csize == 0 || mi_os_is_huge_reserved(addr)) return true; if (reset) _mi_stat_increase(&stats->reset, csize); else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! @@ -659,7 +659,9 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { size_t csize = 0; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return false; - + if (mi_os_is_huge_reserved(addr)) { + _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); + } int err = 0; #ifdef _WIN32 DWORD oldprotect = 0; From a1c5218ff52e2b187936e6f8cd77a4f8733458e8 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 23 Aug 2019 11:33:06 -0700 Subject: [PATCH 05/40] use 4TiB area on windows 64-bit for aligned allocation --- src/os.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/os.c b/src/os.c index 9edcd064..5d872036 100644 --- a/src/os.c +++ b/src/os.c @@ -220,7 +220,18 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) page instead (error %lx)\n", err); } } - +#endif +#if (MI_INTPTR_SIZE >= 8) + // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations + static volatile intptr_t aligned_base = ((intptr_t)4 << 40); // starting at 4TiB + if (addr == NULL && try_alignment > 0 && try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) { + intptr_t hint = mi_atomic_add(&aligned_base, size) - size; + if (hint%try_alignment == 0) { + return VirtualAlloc((void*)hint, size, flags, PAGE_READWRITE); + } + } +#endif +#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) // on modern Windows try use VirtualAlloc2 for aligned allocation if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { MEM_ADDRESS_REQUIREMENTS reqs = { 0 }; @@ -781,7 +792,7 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept // Allocate one page at the time but try to place them contiguously // We allocate one page at the time to be able to abort if it takes too long double start_t = _mi_clock_start(); - uint8_t* start = (uint8_t*)((uintptr_t)8 << 40); // 8TiB virtual start address + uint8_t* start = (uint8_t*)((uintptr_t)16 << 40); // 16TiB virtual start address uint8_t* addr = start; // current top of the allocations for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) { // allocate lorgu pages From b51130970986dac99959f7dca1164bcfc2296a5f Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 23 Aug 2019 13:44:43 -0700 Subject: [PATCH 06/40] ensure volatile declaration for abandoned_next field --- include/mimalloc-types.h | 2 +- src/page-queue.c | 1 + src/segment.c | 16 +++++++++------- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index c0778f87..dd1f05e3 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -204,7 +204,7 @@ typedef enum mi_page_kind_e { typedef struct mi_segment_s { struct mi_segment_s* next; struct mi_segment_s* prev; - struct mi_segment_s* abandoned_next; + volatile struct mi_segment_s* abandoned_next; size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t used; // count of pages in use (`used <= capacity`) size_t capacity; // count of available pages (`#free + used`) diff --git a/src/page-queue.c b/src/page-queue.c index e476403b..859b1d57 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -130,6 +130,7 @@ extern inline uint8_t _mi_bin(size_t size) { // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + mi_assert_internal(bin < MI_BIN_HUGE); } mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE); return bin; diff --git a/src/segment.c b/src/segment.c index d5a2288a..6379b24a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -558,13 +558,15 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segment_remove_from_free_queue(segment,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); // all pages in the segment are abandoned; add it to the abandoned list - segment->thread_id = 0; - do { - segment->abandoned_next = (mi_segment_t*)abandoned; - } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, segment->abandoned_next)); - mi_atomic_increment(&abandoned_count); - _mi_stat_increase(&tld->stats->segments_abandoned,1); + _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); + segment->thread_id = 0; + mi_segment_t* next; + do { + next = (mi_segment_t*)abandoned; + mi_atomic_write_ptr((volatile void**)&segment->abandoned_next, next); + } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, next)); + mi_atomic_increment(&abandoned_count); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -598,7 +600,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_t* segment; do { segment = (mi_segment_t*)abandoned; - } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment->abandoned_next, segment)); + } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, (mi_segment_t*)segment->abandoned_next, segment)); if (segment==NULL) break; // stop early if no more segments available // got it. From acde83543f71f8fda793df196236c62d1864c04a Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 23 Aug 2019 14:08:00 -0700 Subject: [PATCH 07/40] remove threadid from pages and keep page flags separate --- include/mimalloc-internal.h | 25 +++++-------------------- include/mimalloc-types.h | 30 ++++++++++++++++-------------- src/alloc.c | 10 +++++----- src/init.c | 9 +++++---- src/os.c | 2 +- src/page.c | 1 - src/segment.c | 18 +++++------------- 7 files changed, 37 insertions(+), 58 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 0bdc1ab6..fa157b76 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -317,39 +317,24 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) } + //----------------------------------------------------------- // Page flags //----------------------------------------------------------- -static inline uintptr_t mi_page_thread_id(const mi_page_t* page) { - return (page->flags & ~MI_PAGE_FLAGS_MASK); -} - -static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) { - mi_assert_internal((thread_id & MI_PAGE_FLAGS_MASK) == 0); - page->flags = thread_id; -} - -static inline void mi_page_set_thread_id(mi_page_t* page, uintptr_t thread_id) { - mi_assert_internal((thread_id & MI_PAGE_FLAGS_MASK) == 0); - page->flags = thread_id | (page->flags & MI_PAGE_FLAGS_MASK); -} - static inline bool mi_page_is_in_full(const mi_page_t* page) { - return ((page->flags & 0x01) != 0); + return page->flags.in_full; } static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { - if (in_full) page->flags |= 0x01; - else page->flags &= ~0x01; + page->flags.in_full = in_full; } static inline bool mi_page_has_aligned(const mi_page_t* page) { - return ((page->flags & 0x02) != 0); + return page->flags.has_aligned; } static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { - if (has_aligned) page->flags |= 0x02; - else page->flags &= ~0x02; + page->flags.has_aligned = has_aligned; } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index dd1f05e3..c20b663a 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -124,12 +124,15 @@ typedef enum mi_delayed_e { } mi_delayed_t; -// Use the bottom 2 bits for the `in_full` and `has_aligned` flags -// and the rest for the threadid (we assume tid's never use those lower 2 bits). -// This allows a single test in `mi_free` to check for unlikely cases -// (namely, non-local free, aligned free, or freeing in a full page) -#define MI_PAGE_FLAGS_MASK ((uintptr_t)0x03) -typedef uintptr_t mi_page_flags_t; +// The `in_full` and `has_aligned` page flags are put in a union to efficiently +// test if both are false (`value == 0`) in the `mi_free` routine. +typedef union mi_page_flags_u { + uint16_t value; + struct { + bool in_full; + bool has_aligned; + }; +} mi_page_flags_t; // Thread free list. // We use the bottom 2 bits of the pointer for mi_delayed_t flags @@ -163,12 +166,12 @@ typedef struct mi_page_s { // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed uint16_t reserved; // number of blocks reserved in memory - // 16 bits padding + mi_page_flags_t flags; // `in_full` and `has_aligned` flags (16 bits) + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #if MI_SECURE uintptr_t cookie; // random cookie to encode the free lists #endif - mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1 size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) @@ -181,12 +184,11 @@ typedef struct mi_page_s { struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` -// improve page index calculation -#if (MI_INTPTR_SIZE==8 && MI_SECURE==0) - void* padding[1]; // 12 words on 64-bit -#elif MI_INTPTR_SIZE==4 - // void* padding[1]; // 12 words on 32-bit -#endif + // improve page index calculation + // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word + #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0) + void* padding[1]; // 12 words on 64-bit in secure mode, 12 words on 32-bit plain + #endif } mi_page_t; diff --git a/src/alloc.c b/src/alloc.c index 9be2ef40..b7881ea5 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -225,19 +225,19 @@ void mi_free(void* p) mi_attr_noexcept } #endif + const uintptr_t tid = _mi_thread_id(); mi_page_t* const page = _mi_segment_page_of(segment, p); #if (MI_STAT>1) mi_heap_t* heap = mi_heap_get_default(); - mi_heap_stat_decrease( heap, malloc, mi_usable_size(p)); + mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease( heap, normal[_mi_bin(page->block_size)], 1); + mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1); } // huge page stat is accounted for in `_mi_page_retire` #endif - const uintptr_t tid = _mi_thread_id(); - if (mi_likely(tid == page->flags)) { // if equal, the thread id matches and it is not a full page, nor has aligned blocks + if (mi_likely(tid == segment->thread_id && page->flags.value == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; mi_block_set_next(page, block, page->local_free); @@ -247,7 +247,7 @@ void mi_free(void* p) mi_attr_noexcept } else { // non-local, aligned blocks, or a full page; use the more generic path - mi_free_generic(segment, page, tid == mi_page_thread_id(page), p); + mi_free_generic(segment, page, tid == segment->thread_id, p); } } diff --git a/src/init.c b/src/init.c index a2e7a9d2..ceb84433 100644 --- a/src/init.c +++ b/src/init.c @@ -13,15 +13,16 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { 0, false, false, false, 0, 0, + { 0 }, NULL, // free #if MI_SECURE 0, #endif - 0, 0, // flags, used + 0, // used NULL, 0, 0, 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==8 && MI_SECURE==0) - , { NULL } + #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0) + , { NULL } // padding #endif }; @@ -350,7 +351,7 @@ void mi_thread_init(void) mi_attr_noexcept pthread_setspecific(mi_pthread_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_pthread_done` is called #endif - #if (MI_DEBUG>0) // not in release mode as that leads to crashes on Windows dynamic override + #if (MI_DEBUG>0) && !defined(NDEBUG) // not in release mode as that leads to crashes on Windows dynamic override _mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); #endif } diff --git a/src/os.c b/src/os.c index 5d872036..b39c667a 100644 --- a/src/os.c +++ b/src/os.c @@ -217,7 +217,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment } else { // else fall back to regular large OS pages - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) page instead (error %lx)\n", err); + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %lx)\n", err); } } #endif diff --git a/src/page.c b/src/page.c index 049ce10a..2da75119 100644 --- a/src/page.c +++ b/src/page.c @@ -75,7 +75,6 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); - mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); diff --git a/src/segment.c b/src/segment.c index 6379b24a..b1a5221c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -549,14 +549,11 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); mi_assert_expensive(mi_segment_is_valid(segment)); -#if MI_DEBUG>1 - for (size_t i = 0; i < segment->capacity; i++) { - mi_assert_internal(!segment->pages[i].segment_in_use || mi_page_thread_id(&segment->pages[i]) == 0); - } -#endif + // remove the segment from the free page queue if needed mi_segment_remove_from_free_queue(segment,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); + // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); @@ -570,11 +567,10 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { - mi_assert(page != NULL && mi_page_thread_id(page) != 0); + mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment)); - segment->abandoned++; - mi_page_set_thread_id(page, 0); + segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); if (segment->used == segment->abandoned) { @@ -626,7 +622,6 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } else { // otherwise reclaim it - mi_page_set_thread_id(page,segment->thread_id); _mi_page_reclaim(heap,page); } } @@ -656,8 +651,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld->stats); - page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); + page->segment_in_use = true; segment->used++; mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { @@ -697,7 +691,6 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_ segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); return page; } @@ -709,7 +702,6 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); return page; } From 25ea9cf142548a65c5109798706bcc872886d93b Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 23 Aug 2019 21:38:45 -0700 Subject: [PATCH 08/40] on windows use 4TiB area for aligned allocation --- src/os.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/os.c b/src/os.c index 7afe447e..e7ed57b5 100644 --- a/src/os.c +++ b/src/os.c @@ -184,6 +184,18 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) #ifdef _WIN32 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { +#if (MI_INTPTR_SIZE >= 8) + // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations + static volatile intptr_t aligned_base = ((intptr_t)4 << 40); // starting at 4TiB + if (addr == NULL && try_alignment > 0 && + try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) + { + intptr_t hint = mi_atomic_add(&aligned_base, size) - size; + if (hint%try_alignment == 0) { + return VirtualAlloc((void*)hint, size, flags, PAGE_READWRITE); + } + } +#endif #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { // on modern Windows try use VirtualAlloc2 for aligned allocation From 15552eba790e7a7e6d8477236c7c51fdb9288ee0 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 23 Aug 2019 13:44:43 -0700 Subject: [PATCH 09/40] ensure volatile declaration for abandoned_next field --- include/mimalloc-types.h | 2 +- src/page-queue.c | 1 + src/segment.c | 16 +++++++++------- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index c0778f87..dd1f05e3 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -204,7 +204,7 @@ typedef enum mi_page_kind_e { typedef struct mi_segment_s { struct mi_segment_s* next; struct mi_segment_s* prev; - struct mi_segment_s* abandoned_next; + volatile struct mi_segment_s* abandoned_next; size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t used; // count of pages in use (`used <= capacity`) size_t capacity; // count of available pages (`#free + used`) diff --git a/src/page-queue.c b/src/page-queue.c index e476403b..859b1d57 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -130,6 +130,7 @@ extern inline uint8_t _mi_bin(size_t size) { // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + mi_assert_internal(bin < MI_BIN_HUGE); } mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE); return bin; diff --git a/src/segment.c b/src/segment.c index d5a2288a..6379b24a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -558,13 +558,15 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segment_remove_from_free_queue(segment,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); // all pages in the segment are abandoned; add it to the abandoned list - segment->thread_id = 0; - do { - segment->abandoned_next = (mi_segment_t*)abandoned; - } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, segment->abandoned_next)); - mi_atomic_increment(&abandoned_count); - _mi_stat_increase(&tld->stats->segments_abandoned,1); + _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); + segment->thread_id = 0; + mi_segment_t* next; + do { + next = (mi_segment_t*)abandoned; + mi_atomic_write_ptr((volatile void**)&segment->abandoned_next, next); + } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, next)); + mi_atomic_increment(&abandoned_count); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -598,7 +600,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_t* segment; do { segment = (mi_segment_t*)abandoned; - } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment->abandoned_next, segment)); + } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, (mi_segment_t*)segment->abandoned_next, segment)); if (segment==NULL) break; // stop early if no more segments available // got it. From 6c6fcad242ebedba6ee07cff2d255457eb811bb8 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 23 Aug 2019 14:08:00 -0700 Subject: [PATCH 10/40] remove threadid from pages and keep page flags separate (cherry picked) --- include/mimalloc-internal.h | 25 +++++-------------------- include/mimalloc-types.h | 30 ++++++++++++++++-------------- src/alloc.c | 10 +++++----- src/init.c | 9 +++++---- src/page.c | 1 - src/segment.c | 18 +++++------------- 6 files changed, 36 insertions(+), 57 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 6455d57e..d886bcec 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -315,39 +315,24 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) } + //----------------------------------------------------------- // Page flags //----------------------------------------------------------- -static inline uintptr_t mi_page_thread_id(const mi_page_t* page) { - return (page->flags & ~MI_PAGE_FLAGS_MASK); -} - -static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) { - mi_assert_internal((thread_id & MI_PAGE_FLAGS_MASK) == 0); - page->flags = thread_id; -} - -static inline void mi_page_set_thread_id(mi_page_t* page, uintptr_t thread_id) { - mi_assert_internal((thread_id & MI_PAGE_FLAGS_MASK) == 0); - page->flags = thread_id | (page->flags & MI_PAGE_FLAGS_MASK); -} - static inline bool mi_page_is_in_full(const mi_page_t* page) { - return ((page->flags & 0x01) != 0); + return page->flags.in_full; } static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { - if (in_full) page->flags |= 0x01; - else page->flags &= ~0x01; + page->flags.in_full = in_full; } static inline bool mi_page_has_aligned(const mi_page_t* page) { - return ((page->flags & 0x02) != 0); + return page->flags.has_aligned; } static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { - if (has_aligned) page->flags |= 0x02; - else page->flags &= ~0x02; + page->flags.has_aligned = has_aligned; } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index dd1f05e3..c20b663a 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -124,12 +124,15 @@ typedef enum mi_delayed_e { } mi_delayed_t; -// Use the bottom 2 bits for the `in_full` and `has_aligned` flags -// and the rest for the threadid (we assume tid's never use those lower 2 bits). -// This allows a single test in `mi_free` to check for unlikely cases -// (namely, non-local free, aligned free, or freeing in a full page) -#define MI_PAGE_FLAGS_MASK ((uintptr_t)0x03) -typedef uintptr_t mi_page_flags_t; +// The `in_full` and `has_aligned` page flags are put in a union to efficiently +// test if both are false (`value == 0`) in the `mi_free` routine. +typedef union mi_page_flags_u { + uint16_t value; + struct { + bool in_full; + bool has_aligned; + }; +} mi_page_flags_t; // Thread free list. // We use the bottom 2 bits of the pointer for mi_delayed_t flags @@ -163,12 +166,12 @@ typedef struct mi_page_s { // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed uint16_t reserved; // number of blocks reserved in memory - // 16 bits padding + mi_page_flags_t flags; // `in_full` and `has_aligned` flags (16 bits) + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #if MI_SECURE uintptr_t cookie; // random cookie to encode the free lists #endif - mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1 size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) @@ -181,12 +184,11 @@ typedef struct mi_page_s { struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` -// improve page index calculation -#if (MI_INTPTR_SIZE==8 && MI_SECURE==0) - void* padding[1]; // 12 words on 64-bit -#elif MI_INTPTR_SIZE==4 - // void* padding[1]; // 12 words on 32-bit -#endif + // improve page index calculation + // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word + #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0) + void* padding[1]; // 12 words on 64-bit in secure mode, 12 words on 32-bit plain + #endif } mi_page_t; diff --git a/src/alloc.c b/src/alloc.c index 9be2ef40..b7881ea5 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -225,19 +225,19 @@ void mi_free(void* p) mi_attr_noexcept } #endif + const uintptr_t tid = _mi_thread_id(); mi_page_t* const page = _mi_segment_page_of(segment, p); #if (MI_STAT>1) mi_heap_t* heap = mi_heap_get_default(); - mi_heap_stat_decrease( heap, malloc, mi_usable_size(p)); + mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease( heap, normal[_mi_bin(page->block_size)], 1); + mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1); } // huge page stat is accounted for in `_mi_page_retire` #endif - const uintptr_t tid = _mi_thread_id(); - if (mi_likely(tid == page->flags)) { // if equal, the thread id matches and it is not a full page, nor has aligned blocks + if (mi_likely(tid == segment->thread_id && page->flags.value == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; mi_block_set_next(page, block, page->local_free); @@ -247,7 +247,7 @@ void mi_free(void* p) mi_attr_noexcept } else { // non-local, aligned blocks, or a full page; use the more generic path - mi_free_generic(segment, page, tid == mi_page_thread_id(page), p); + mi_free_generic(segment, page, tid == segment->thread_id, p); } } diff --git a/src/init.c b/src/init.c index ec64def8..76e586f2 100644 --- a/src/init.c +++ b/src/init.c @@ -13,15 +13,16 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { 0, false, false, false, 0, 0, + { 0 }, NULL, // free #if MI_SECURE 0, #endif - 0, 0, // flags, used + 0, // used NULL, 0, 0, 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==8 && MI_SECURE==0) - , { NULL } + #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0) + , { NULL } // padding #endif }; @@ -350,7 +351,7 @@ void mi_thread_init(void) mi_attr_noexcept pthread_setspecific(mi_pthread_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_pthread_done` is called #endif - #if (MI_DEBUG>0) // not in release mode as that leads to crashes on Windows dynamic override + #if (MI_DEBUG>0) && !defined(NDEBUG) // not in release mode as that leads to crashes on Windows dynamic override _mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); #endif } diff --git a/src/page.c b/src/page.c index 549ced38..a7b4a760 100644 --- a/src/page.c +++ b/src/page.c @@ -75,7 +75,6 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); - mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); diff --git a/src/segment.c b/src/segment.c index 6379b24a..b1a5221c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -549,14 +549,11 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); mi_assert_expensive(mi_segment_is_valid(segment)); -#if MI_DEBUG>1 - for (size_t i = 0; i < segment->capacity; i++) { - mi_assert_internal(!segment->pages[i].segment_in_use || mi_page_thread_id(&segment->pages[i]) == 0); - } -#endif + // remove the segment from the free page queue if needed mi_segment_remove_from_free_queue(segment,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); + // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); @@ -570,11 +567,10 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { - mi_assert(page != NULL && mi_page_thread_id(page) != 0); + mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment)); - segment->abandoned++; - mi_page_set_thread_id(page, 0); + segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); if (segment->used == segment->abandoned) { @@ -626,7 +622,6 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } else { // otherwise reclaim it - mi_page_set_thread_id(page,segment->thread_id); _mi_page_reclaim(heap,page); } } @@ -656,8 +651,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld->stats); - page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); + page->segment_in_use = true; segment->used++; mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { @@ -697,7 +691,6 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_ segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); return page; } @@ -709,7 +702,6 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; - mi_page_init_flags(page,segment->thread_id); return page; } From d04d379f66080c9a1e0618e2d834b455b7c98bd1 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 23 Aug 2019 21:44:07 -0700 Subject: [PATCH 11/40] fix merge conflicts --- src/os.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/os.c b/src/os.c index 403f9766..a1b6cdf3 100644 --- a/src/os.c +++ b/src/os.c @@ -196,18 +196,6 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) #ifdef _WIN32 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { -#if (MI_INTPTR_SIZE >= 8) - // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations - static volatile intptr_t aligned_base = ((intptr_t)4 << 40); // starting at 4TiB - if (addr == NULL && try_alignment > 0 && - try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) - { - intptr_t hint = mi_atomic_add(&aligned_base, size) - size; - if (hint%try_alignment == 0) { - return VirtualAlloc((void*)hint, size, flags, PAGE_READWRITE); - } - } -#endif #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages if ((size % (uintptr_t)1 << 30) == 0 /* 1GiB multiple */ @@ -236,7 +224,9 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations static volatile intptr_t aligned_base = ((intptr_t)4 << 40); // starting at 4TiB - if (addr == NULL && try_alignment > 0 && try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) { + if (addr == NULL && try_alignment > 0 && + try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) + { intptr_t hint = mi_atomic_add(&aligned_base, size) - size; if (hint%try_alignment == 0) { return VirtualAlloc((void*)hint, size, flags, PAGE_READWRITE); From 99e071cf2c3c3557c2e755911f69b7372c66fe09 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sat, 24 Aug 2019 21:24:56 +0800 Subject: [PATCH 12/40] Link with -lrt for older glibc Quoted from Linux Programmer's Manual (2017-09-15): #include int clock_gettime(clockid_t clk_id, struct timespec *tp); Link with -lrt (only for glibc versions before 2.17). This patch adds additional checks for librt availability and append target_link_libraries accordingly. librt is absent on macOS. Fixed #139 --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c9de8618..d44b3408 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,6 +108,10 @@ if(WIN32) list(APPEND mi_libraries psapi shell32 user32) else() list(APPEND mi_libraries pthread) + find_library(LIBRT rt) + if(LIBRT) + list(APPEND mi_libraries ${LIBRT}) + endif() endif() # ----------------------------------------------------------------------------- From 23812cc0ac0e37fb2c123f1d391aecdfc372fbfc Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 24 Aug 2019 15:45:14 -0700 Subject: [PATCH 13/40] do not keep a queue of huge pages and free them directly --- src/alloc.c | 15 +++++++++++++++ src/page-queue.c | 1 + src/page.c | 30 ++++++++++++++++++------------ src/segment.c | 5 +++-- 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index b7881ea5..76e093e7 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -115,6 +115,21 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_thread_free_t tfreex; bool use_delayed; + mi_segment_t* segment = _mi_page_segment(page); + if (segment->page_kind==MI_PAGE_HUGE) { + // huge page segments are always abandoned and can be freed immediately + mi_assert_internal(segment->thread_id==0); + mi_assert_internal(segment->abandoned_next==NULL); + // claim it and free + mi_block_set_next(page, block, page->free); + page->free = block; + page->used--; + mi_heap_t* heap = mi_get_default_heap(); + segment->thread_id = heap->thread_id; + _mi_segment_page_free(page,true,&heap->tld->segments); + return; + } + do { tfree = page->thread_free; use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE || diff --git a/src/page-queue.c b/src/page-queue.c index 859b1d57..d613095f 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -268,6 +268,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page->heap == NULL); mi_assert_internal(!mi_page_queue_contains(queue, page)); + mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); diff --git a/src/page.c b/src/page.c index a7b4a760..f7e0ce2c 100644 --- a/src/page.c +++ b/src/page.c @@ -98,11 +98,13 @@ bool _mi_page_is_valid(mi_page_t* page) { #endif if (page->heap!=NULL) { mi_segment_t* segment = _mi_page_segment(page); - mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id); - mi_page_queue_t* pq = mi_page_queue_of(page); - mi_assert_internal(mi_page_queue_contains(pq, page)); - mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); - mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); + mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id || segment->thread_id==0); + if (segment->page_kind != MI_PAGE_HUGE) { + mi_page_queue_t* pq = mi_page_queue_of(page); + mi_assert_internal(mi_page_queue_contains(pq, page)); + mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); + mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); + } } return true; } @@ -204,6 +206,7 @@ void _mi_page_free_collect(mi_page_t* page) { void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_internal(page->heap == NULL); + mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); _mi_page_free_collect(page); mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); mi_page_queue_push(heap, pq, page); @@ -212,12 +215,13 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { // allocate a fresh page from a segment static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) { - mi_assert_internal(mi_heap_contains_queue(heap, pq)); + mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq)); mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os); if (page == NULL) return NULL; + mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_page_init(heap, page, block_size, &heap->tld->stats); _mi_stat_increase( &heap->tld->stats.pages, 1); - mi_page_queue_push(heap, pq, page); + if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL mi_assert_expensive(_mi_page_is_valid(page)); return page; } @@ -699,13 +703,15 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { // A huge page is allocated directly without being in a queue static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t); - mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); - mi_page_queue_t* pq = mi_page_queue(heap,block_size); - mi_assert_internal(mi_page_queue_is_huge(pq)); - mi_page_t* page = mi_page_fresh_alloc(heap,pq,block_size); + mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); + mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); if (page != NULL) { mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(page->block_size == block_size); + mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); + mi_assert_internal(_mi_page_segment(page)->used==1); + mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue + page->heap = NULL; if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { _mi_stat_increase(&heap->tld->stats.giant, block_size); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); @@ -714,7 +720,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { _mi_stat_increase(&heap->tld->stats.huge, block_size); _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); } - } + } return page; } diff --git a/src/segment.c b/src/segment.c index b1a5221c..3be703cf 100644 --- a/src/segment.c +++ b/src/segment.c @@ -134,7 +134,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment) { if (!segment->pages[i].segment_in_use) nfree++; } mi_assert_internal(nfree + segment->used == segment->capacity); - mi_assert_internal(segment->thread_id == _mi_thread_id()); // or 0 + mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || (mi_segment_pagesize(segment) * segment->capacity == segment->segment_size)); return true; @@ -700,6 +700,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld if (segment == NULL) return NULL; mi_assert_internal(segment->segment_size - segment->segment_info_size >= size); segment->used = 1; + segment->thread_id = 0; // huge pages are immediately abandoned mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; return page; @@ -721,7 +722,7 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) { page = mi_segment_medium_page_alloc(tld, os_tld); } - else if (block_size < MI_LARGE_OBJ_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) { + else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { page = mi_segment_large_page_alloc(tld, os_tld); } else { From a431d80fc38639fa4186baa0a0d13063fa7dc994 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 25 Aug 2019 10:01:11 -0700 Subject: [PATCH 14/40] better object sizes for large objects --- include/mimalloc-types.h | 12 +++++++----- src/page.c | 5 ++++- src/segment.c | 8 ++------ 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index c20b663a..db39b9c4 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -91,11 +91,13 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE) #define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE) -#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) -#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit -#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2Mb on 64-bit -#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX>>MI_INTPTR_SHIFT) -#define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) +// The max object size are checked to not waste more than 12.5% internally over the page sizes. +// (Except for large pages since huge objects are allocated in 4MiB chunks) +#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 16kb +#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb +#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2mb +#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) +#define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) // Minimal alignment necessary. On most platforms 16 bytes are needed // due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` diff --git a/src/page.c b/src/page.c index f7e0ce2c..cb891d18 100644 --- a/src/page.c +++ b/src/page.c @@ -700,7 +700,10 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { General allocation ----------------------------------------------------------- */ -// A huge page is allocated directly without being in a queue +// A huge page is allocated directly without being in a queue. +// Because huge pages contain just one block, and the segment contains +// just that page, we always treat them as abandoned and any thread +// that frees the block can free the whole page and segment directly. static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t); mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); diff --git a/src/segment.c b/src/segment.c index 3be703cf..85a63ca4 100644 --- a/src/segment.c +++ b/src/segment.c @@ -709,17 +709,13 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld /* ----------------------------------------------------------- Page allocation and free ----------------------------------------------------------- */ -static bool mi_is_good_fit(size_t bsize, size_t size) { - // good fit if no more than 25% wasted - return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4)); -} mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (block_size <= MI_SMALL_OBJ_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) { + if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { page = mi_segment_small_page_alloc(tld,os_tld); } - else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) { + else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { page = mi_segment_medium_page_alloc(tld, os_tld); } else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { From 7972b64a822dba03763d4fb9c28fa43765984da7 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 25 Aug 2019 11:18:16 -0700 Subject: [PATCH 15/40] resolve race on option initialization --- src/options.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index bc658ca9..8880c317 100644 --- a/src/options.c +++ b/src/options.c @@ -18,6 +18,9 @@ int mi_version(void) mi_attr_noexcept { // -------------------------------------------------------- // Options +// These can be accessed by multiple threads and may be +// concurrently initialized, but an initializing data race +// is ok since they resolve to the same value. // -------------------------------------------------------- typedef enum mi_init_e { UNINIT, // not yet initialized @@ -180,7 +183,6 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) { } static void mi_option_init(mi_option_desc_t* desc) { - desc->init = DEFAULTED; // Read option value from the environment char buf[32]; mi_strlcpy(buf, "mimalloc_", sizeof(buf)); @@ -218,7 +220,12 @@ static void mi_option_init(mi_option_desc_t* desc) { } else { _mi_warning_message("environment option mimalloc_%s has an invalid value: %s\n", desc->name, buf); + desc->init = DEFAULTED; } } } + else { + desc->init = DEFAULTED; + } + mi_assert_internal(desc->init != UNINIT); } From 038e8fd7d68f8a328c0e728b102dccb00df2da0f Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 25 Aug 2019 12:10:18 -0700 Subject: [PATCH 16/40] reduce retire size --- src/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index cb891d18..54897af5 100644 --- a/src/page.c +++ b/src/page.c @@ -393,7 +393,7 @@ void _mi_page_retire(mi_page_t* page) { // is the only page left with free blocks. It is not clear // how to check this efficiently though... for now we just check // if its neighbours are almost fully used. - if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { + if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) { if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { _mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1); return; // dont't retire after all From b86c851cca62fc03233295178c2119d43e7ce486 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 25 Aug 2019 12:29:01 -0700 Subject: [PATCH 17/40] put segment cache behind an option and disable by default --- include/mimalloc.h | 1 + src/options.c | 1 + src/segment.c | 15 ++++++++------- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 9f27e463..4f13bc1f 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -228,6 +228,7 @@ typedef enum mi_option_e { mi_option_eager_commit, mi_option_eager_region_commit, mi_option_large_os_pages, // implies eager commit + mi_option_segment_cache, mi_option_page_reset, mi_option_cache_reset, mi_option_reset_decommits, diff --git a/src/options.c b/src/options.c index b48e45ef..b30ff1c6 100644 --- a/src/options.c +++ b/src/options.c @@ -65,6 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 1, UNINIT, MI_OPTION(eager_region_commit) }, #endif { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's + { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, { 0, UNINIT, MI_OPTION(cache_reset) }, { 0, UNINIT, MI_OPTION(reset_decommits) } // note: cannot enable this if secure is on diff --git a/src/segment.c b/src/segment.c index 85a63ca4..18c06fbc 100644 --- a/src/segment.c +++ b/src/segment.c @@ -236,8 +236,6 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, -// and no more than 4. -#define MI_SEGMENT_CACHE_MAX (4) #define MI_SEGMENT_CACHE_FRACTION (8) // note: returned segment may be partially reset @@ -253,15 +251,18 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t return segment; } -static bool mi_segment_cache_full(mi_segments_tld_t* tld) { - if (tld->cache_count < MI_SEGMENT_CACHE_MAX - && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) - ) { // always allow 1 element cache +static bool mi_segment_cache_full(mi_segments_tld_t* tld) +{ + if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread + size_t max_cache = mi_option_get(mi_option_segment_cache); + if (tld->cache_count < max_cache + && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache + ) { return false; } // take the opportunity to reduce the segment cache if it is too large (now) // TODO: this never happens as we check against peak usage, should we use current usage instead? - while (tld->cache_count > MI_SEGMENT_CACHE_MAX ) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { + while (tld->cache_count > max_cache) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { mi_segment_t* segment = mi_segment_cache_pop(0,tld); mi_assert_internal(segment != NULL); if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld); From 6e360d34eea79e85b21352014a88c53c104c211d Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 25 Aug 2019 13:15:26 -0700 Subject: [PATCH 18/40] fix 1GB huge page flag on Linux --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index a1b6cdf3..3a9bd30c 100644 --- a/src/os.c +++ b/src/os.c @@ -353,7 +353,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro lflags |= MAP_HUGETLB; #endif #ifdef MAP_HUGE_1GB - if ((size % (uintptr_t)1 << 20) == 0) { + if ((size % ((uintptr_t)1 << 30)) == 0) { lflags |= MAP_HUGE_1GB; } else From e8664001f76981079191b22aff6dbdada135e6fa Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 25 Aug 2019 22:59:12 -0700 Subject: [PATCH 19/40] Use standard _Atomic declarations and clean up atomic operations --- include/mimalloc-atomic.h | 186 +++++++++++++++++++------------------- include/mimalloc-types.h | 11 ++- src/alloc.c | 6 +- src/memory.c | 54 +++++------ src/options.c | 2 +- src/os.c | 18 ++-- src/page.c | 13 +-- src/segment.c | 12 +-- src/stats.c | 22 ++--- 9 files changed, 165 insertions(+), 159 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index d504634c..739d0512 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -9,63 +9,98 @@ terms of the MIT license. A copy of the license can be found in the file #define MIMALLOC_ATOMIC_H // ------------------------------------------------------ -// Atomics +// Atomics +// We need to be portable between C, C++, and MSVC. // ------------------------------------------------------ -// Atomically increment a value; returns the incremented result. -static inline uintptr_t mi_atomic_increment(volatile uintptr_t* p); +#if defined(_MSC_VER) +#define _Atomic(tp) tp +#define ATOMIC_VAR_INIT(x) x +#elif defined(__cplusplus) +#include +#define _Atomic(tp) std::atomic +#else +#include +#endif -// Atomically increment a value; returns the incremented result. -static inline uint32_t mi_atomic_increment32(volatile uint32_t* p); +#define mi_atomic_cast(tp,x) (volatile _Atomic(tp)*)(x) -// Atomically decrement a value; returns the decremented result. -static inline uintptr_t mi_atomic_decrement(volatile uintptr_t* p); +// ------------------------------------------------------ +// Atomic operations specialized for mimalloc +// ------------------------------------------------------ -// Atomically add a 64-bit value; returns the added result. -static inline int64_t mi_atomic_add(volatile int64_t* p, int64_t add); +// Atomically add a 64-bit value; returns the previous value. +// Note: not using _Atomic(int64_t) as it is only used for stats. +static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add); -// Atomically subtract a value; returns the subtracted result. -static inline uintptr_t mi_atomic_subtract(volatile uintptr_t* p, uintptr_t sub); +// Atomically add a value; returns the previous value. +static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add); -// Atomically subtract a value; returns the subtracted result. -static inline uint32_t mi_atomic_subtract32(volatile uint32_t* p, uint32_t sub); +// Atomically compare and exchange a value; returns `true` if successful. May fail spuriously. +// (Note: expected and desired are in opposite order from atomic_compare_exchange) +static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); // Atomically compare and exchange a value; returns `true` if successful. -static inline bool mi_atomic_compare_exchange32(volatile uint32_t* p, uint32_t exchange, uint32_t compare); - -// Atomically compare and exchange a value; returns `true` if successful. -static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t exchange, uintptr_t compare); +static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); // Atomically exchange a value. -static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange); +static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange); // Atomically read a value -static inline uintptr_t mi_atomic_read(volatile uintptr_t* p); +static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p); // Atomically write a value -static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x); - -// Atomically read a pointer -static inline void* mi_atomic_read_ptr(volatile void** p) { - return (void*)mi_atomic_read( (volatile uintptr_t*)p ); -} +static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x); +// Yield static inline void mi_atomic_yield(void); + +// Atomically add a value; returns the previous value. +static inline uintptr_t mi_atomic_addu(volatile _Atomic(uintptr_t)* p, uintptr_t add) { + return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, (intptr_t)add); +} +// Atomically subtract a value; returns the previous value. +static inline uintptr_t mi_atomic_subu(volatile _Atomic(uintptr_t)* p, uintptr_t sub) { + return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, -((intptr_t)sub)); +} + +// Atomically increment a value; returns the incremented result. +static inline uintptr_t mi_atomic_increment(volatile _Atomic(uintptr_t)* p) { + return mi_atomic_addu(p, 1); +} + +// Atomically decrement a value; returns the decremented result. +static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) { + return mi_atomic_subu(p, 1); +} + +// Atomically read a pointer +static inline void* mi_atomic_read_ptr_relaxed(volatile _Atomic(void*) const * p) { + return (void*)mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)p); +} + // Atomically write a pointer -static inline void mi_atomic_write_ptr(volatile void** p, void* x) { - mi_atomic_write((volatile uintptr_t*)p, (uintptr_t)x ); +static inline void mi_atomic_write_ptr(volatile _Atomic(void*)* p, void* x) { + mi_atomic_write((volatile _Atomic(uintptr_t)*)p, (uintptr_t)x ); +} + +// Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously. +// (Note: expected and desired are in opposite order from atomic_compare_exchange) +static inline bool mi_atomic_cas_ptr_weak(volatile _Atomic(void*)* p, void* desired, void* expected) { + return mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected); } // Atomically compare and exchange a pointer; returns `true` if successful. -static inline bool mi_atomic_compare_exchange_ptr(volatile void** p, void* newp, void* compare) { - return mi_atomic_compare_exchange((volatile uintptr_t*)p, (uintptr_t)newp, (uintptr_t)compare); +// (Note: expected and desired are in opposite order from atomic_compare_exchange) +static inline bool mi_atomic_cas_ptr_strong(volatile _Atomic(void*)* p, void* desired, void* expected) { + return mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected); } // Atomically exchange a pointer value. -static inline void* mi_atomic_exchange_ptr(volatile void** p, void* exchange) { - return (void*)mi_atomic_exchange((volatile uintptr_t*)p, (uintptr_t)exchange); +static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exchange) { + return (void*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)p, (uintptr_t)exchange); } @@ -73,49 +108,37 @@ static inline void* mi_atomic_exchange_ptr(volatile void** p, void* exchange) { #define WIN32_LEAN_AND_MEAN #include #include -#if (MI_INTPTR_SIZE==8) +#ifdef _WIN64 typedef LONG64 msc_intptr_t; #define RC64(f) f##64 #else typedef LONG msc_intptr_t; #define RC64(f) f #endif -static inline uintptr_t mi_atomic_increment(volatile uintptr_t* p) { - return (uintptr_t)RC64(_InterlockedIncrement)((volatile msc_intptr_t*)p); +static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { + return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); } -static inline uint32_t mi_atomic_increment32(volatile uint32_t* p) { - return (uint32_t)_InterlockedIncrement((volatile LONG*)p); +static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { + return (expected == RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); } -static inline uintptr_t mi_atomic_decrement(volatile uintptr_t* p) { - return (uintptr_t)RC64(_InterlockedDecrement)((volatile msc_intptr_t*)p); +static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { + return mi_atomic_cas_strong(p,desired,expected); } -static inline uintptr_t mi_atomic_subtract(volatile uintptr_t* p, uintptr_t sub) { - return (uintptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)) - sub; -} -static inline uint32_t mi_atomic_subtract32(volatile uint32_t* p, uint32_t sub) { - return (uint32_t)_InterlockedExchangeAdd((volatile LONG*)p, -((LONG)sub)) - sub; -} -static inline bool mi_atomic_compare_exchange32(volatile uint32_t* p, uint32_t exchange, uint32_t compare) { - return ((int32_t)compare == _InterlockedCompareExchange((volatile LONG*)p, (LONG)exchange, (LONG)compare)); -} -static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t exchange, uintptr_t compare) { - return (compare == RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange, (msc_intptr_t)compare)); -} -static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange) { +static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } -static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) { +static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { return *p; } -static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) { - *p = x; +static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + mi_atomic_exchange(p,x); } static inline void mi_atomic_yield(void) { YieldProcessor(); } -static inline int64_t mi_atomic_add(volatile int64_t* p, int64_t add) { - #if (MI_INTPTR_SIZE==8) - return _InterlockedExchangeAdd64(p, add) + add; +static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { + #ifdef _WIN64 + return mi_atomic_add(p,add); #else int64_t current; int64_t sum; @@ -123,62 +146,43 @@ static inline int64_t mi_atomic_add(volatile int64_t* p, int64_t add) { current = *p; sum = current + add; } while (_InterlockedCompareExchange64(p, sum, current) != current); - return sum; + return current; #endif } #else #ifdef __cplusplus -#include #define MI_USING_STD using namespace std; -#define _Atomic(tp) atomic #else -#include #define MI_USING_STD #endif -static inline uintptr_t mi_atomic_increment(volatile uintptr_t* p) { +static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add) { MI_USING_STD - return atomic_fetch_add_explicit((volatile atomic_uintptr_t*)p, (uintptr_t)1, memory_order_relaxed) + 1; + return atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); } -static inline uint32_t mi_atomic_increment32(volatile uint32_t* p) { +static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { MI_USING_STD - return atomic_fetch_add_explicit((volatile _Atomic(uint32_t)*)p, (uint32_t)1, memory_order_relaxed) + 1; + return atomic_fetch_add_explicit(p, add, memory_order_relaxed); } -static inline uintptr_t mi_atomic_decrement(volatile uintptr_t* p) { +static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { MI_USING_STD - return atomic_fetch_sub_explicit((volatile atomic_uintptr_t*)p, (uintptr_t)1, memory_order_relaxed) - 1; + return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_relaxed); } -static inline int64_t mi_atomic_add(volatile int64_t* p, int64_t add) { +static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { MI_USING_STD - return atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed) + add; + return atomic_compare_exchange_strong_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_relaxed); } -static inline uintptr_t mi_atomic_subtract(volatile uintptr_t* p, uintptr_t sub) { +static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { MI_USING_STD - return atomic_fetch_sub_explicit((volatile atomic_uintptr_t*)p, sub, memory_order_relaxed) - sub; + return atomic_exchange_explicit(p, exchange, memory_order_acq_rel); } -static inline uint32_t mi_atomic_subtract32(volatile uint32_t* p, uint32_t sub) { +static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p) { MI_USING_STD - return atomic_fetch_sub_explicit((volatile _Atomic(uint32_t)*)p, sub, memory_order_relaxed) - sub; + return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed); } -static inline bool mi_atomic_compare_exchange32(volatile uint32_t* p, uint32_t exchange, uint32_t compare) { +static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { MI_USING_STD - return atomic_compare_exchange_weak_explicit((volatile _Atomic(uint32_t)*)p, &compare, exchange, memory_order_release, memory_order_relaxed); -} -static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t exchange, uintptr_t compare) { - MI_USING_STD - return atomic_compare_exchange_weak_explicit((volatile atomic_uintptr_t*)p, &compare, exchange, memory_order_release, memory_order_relaxed); -} -static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange) { - MI_USING_STD - return atomic_exchange_explicit((volatile atomic_uintptr_t*)p, exchange, memory_order_acquire); -} -static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) { - MI_USING_STD - return atomic_load_explicit((volatile atomic_uintptr_t*)p, memory_order_relaxed); -} -static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) { - MI_USING_STD - return atomic_store_explicit((volatile atomic_uintptr_t*)p, x, memory_order_relaxed); + return atomic_store_explicit(p, x, memory_order_release); } #if defined(__cplusplus) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index db39b9c4..0b2334b8 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -10,6 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #include // ptrdiff_t #include // uintptr_t, uint16_t, etc +#include // _Atomic // ------------------------------------------------------ // Variants @@ -177,8 +178,8 @@ typedef struct mi_page_s { size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) - volatile uintptr_t thread_freed; // at least this number of blocks are in `thread_free` - volatile mi_thread_free_t thread_free; // list of deferred free blocks freed by other threads + volatile _Atomic(uintptr_t) thread_freed; // at least this number of blocks are in `thread_free` + volatile _Atomic(mi_thread_free_t) thread_free; // list of deferred free blocks freed by other threads // less accessed info size_t block_size; // size available in each block (always `>0`) @@ -208,7 +209,7 @@ typedef enum mi_page_kind_e { typedef struct mi_segment_s { struct mi_segment_s* next; struct mi_segment_s* prev; - volatile struct mi_segment_s* abandoned_next; + volatile _Atomic(struct mi_segment_s*) abandoned_next; size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t used; // count of pages in use (`used <= capacity`) size_t capacity; // count of available pages (`#free + used`) @@ -219,7 +220,7 @@ typedef struct mi_segment_s { // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). - volatile uintptr_t thread_id; // unique id of the thread owning this segment + volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment mi_page_kind_t page_kind; // kind of pages: small, large, or huge mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages } mi_segment_t; @@ -255,7 +256,7 @@ struct mi_heap_s { mi_tld_t* tld; mi_page_t* pages_free_direct[MI_SMALL_WSIZE_MAX + 2]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") - volatile mi_block_t* thread_delayed_free; + volatile _Atomic(mi_block_t*) thread_delayed_free; uintptr_t thread_id; // thread this heap belongs too uintptr_t cookie; uintptr_t random; // random number used for secure allocation diff --git a/src/alloc.c b/src/alloc.c index 76e093e7..97c5fcc4 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -144,7 +144,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_set_next(page, block, mi_tf_block(tfree)); tfreex = mi_tf_set_block(tfree,block); } - } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); + } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); if (mi_likely(!use_delayed)) { // increment the thread free count and return @@ -160,7 +160,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc do { dfree = (mi_block_t*)heap->thread_delayed_free; mi_block_set_nextx(heap->cookie,block,dfree); - } while (!mi_atomic_compare_exchange_ptr((volatile void**)&heap->thread_delayed_free, block, dfree)); + } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); } // and reset the MI_DELAYED_FREEING flag @@ -168,7 +168,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc tfreex = tfree = page->thread_free; mi_assert_internal(mi_tf_delayed(tfree) == MI_NEVER_DELAYED_FREE || mi_tf_delayed(tfree) == MI_DELAYED_FREEING); if (mi_tf_delayed(tfree) != MI_NEVER_DELAYED_FREE) tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); + } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); } } diff --git a/src/memory.c b/src/memory.c index 26f87092..1ea6ee16 100644 --- a/src/memory.c +++ b/src/memory.c @@ -69,8 +69,8 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile uintptr_t map; // in-use bit per MI_SEGMENT_SIZE block - volatile void* start; // start of virtual memory area + volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block + volatile _Atomic(void*) start; // start of virtual memory area } mem_region_t; @@ -78,7 +78,7 @@ typedef struct mem_region_s { // TODO: in the future, maintain a map per NUMA node for numa aware allocation static mem_region_t regions[MI_REGION_MAX]; -static volatile size_t regions_count = 0; // allocated regions +static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions /* ---------------------------------------------------------------------------- @@ -106,9 +106,9 @@ static size_t mi_good_commit_size(size_t size) { // Return if a pointer points into a region reserved by us. bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; - size_t count = mi_atomic_read(®ions_count); + size_t count = mi_atomic_read_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_atomic_read_ptr(®ions[i].start); + uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; @@ -127,11 +127,11 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit { size_t mask = mi_region_block_mask(blocks,bitidx); mi_assert_internal(mask != 0); - mi_assert_internal((mask & mi_atomic_read(®ion->map)) == mask); + mi_assert_internal((mask & mi_atomic_read_relaxed(®ion->map)) == mask); mi_assert_internal(®ions[idx] == region); // ensure the region is reserved - void* start = mi_atomic_read_ptr(®ion->start); + void* start = mi_atomic_read_ptr_relaxed(®ion->start); if (start == NULL) { start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_region_commit), tld); @@ -139,13 +139,13 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // failure to allocate from the OS! unclaim the blocks and fail size_t map; do { - map = mi_atomic_read(®ion->map); - } while (!mi_atomic_compare_exchange(®ion->map, map & ~mask, map)); + map = mi_atomic_read_relaxed(®ion->map); + } while (!mi_atomic_cas_weak(®ion->map, map & ~mask, map)); return false; } // set the newly allocated region - if (mi_atomic_compare_exchange_ptr(®ion->start, start, NULL)) { + if (mi_atomic_cas_ptr_strong(®ion->start, start, NULL)) { // update the region count mi_atomic_increment(®ions_count); } @@ -154,9 +154,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // we assign it to a later slot instead (up to 4 tries). // note: we don't need to increment the region count, this will happen on another allocation for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { - void* s = mi_atomic_read_ptr(®ions[idx+i].start); + void* s = mi_atomic_read_ptr_relaxed(®ions[idx+i].start); if (s == NULL) { // quick test - if (mi_atomic_compare_exchange_ptr(®ions[idx+i].start, start, s)) { + if (mi_atomic_cas_ptr_weak(®ions[idx+i].start, start, s)) { start = NULL; break; } @@ -167,10 +167,10 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit _mi_os_free(start, MI_REGION_SIZE, tld->stats); } // and continue with the memory at our index - start = mi_atomic_read_ptr(®ion->start); + start = mi_atomic_read_ptr_relaxed(®ion->start); } } - mi_assert_internal(start == mi_atomic_read_ptr(®ion->start)); + mi_assert_internal(start == mi_atomic_read_ptr_relaxed(®ion->start)); mi_assert_internal(start != NULL); // Commit the blocks to memory @@ -230,7 +230,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc const uintptr_t mask = mi_region_block_mask(blocks, 0); const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; - uintptr_t map = mi_atomic_read(®ion->map); + uintptr_t map = mi_atomic_read_relaxed(®ion->map); #ifdef MI_HAVE_BITSCAN size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible @@ -245,9 +245,9 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc mi_assert_internal((m >> bitidx) == mask); // no overflow? uintptr_t newmap = map | m; mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_compare_exchange(®ion->map, newmap, map)) { + if (!mi_atomic_cas_strong(®ion->map, newmap, map)) { // no success, another thread claimed concurrently.. keep going - map = mi_atomic_read(®ion->map); + map = mi_atomic_read_relaxed(®ion->map); continue; } else { @@ -281,7 +281,7 @@ static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, b // check if there are available blocks in the region.. mi_assert_internal(idx < MI_REGION_MAX); mem_region_t* region = ®ions[idx]; - uintptr_t m = mi_atomic_read(®ion->map); + uintptr_t m = mi_atomic_read_relaxed(®ion->map); if (m != MI_REGION_MAP_FULL) { // some bits are zero return mi_region_alloc_blocks(region, idx, blocks, size, commit, p, id, tld); } @@ -317,7 +317,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* // find a range of free blocks void* p = NULL; - size_t count = mi_atomic_read(®ions_count); + size_t count = mi_atomic_read_relaxed(®ions_count); size_t idx = tld->region_idx; // start index is per-thread to reduce contention for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around @@ -376,8 +376,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { size_t mask = mi_region_block_mask(blocks, bitidx); mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? mem_region_t* region = ®ions[idx]; - mi_assert_internal((mi_atomic_read(®ion->map) & mask) == mask ); // claimed? - void* start = mi_atomic_read_ptr(®ion->start); + mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed? + void* start = mi_atomic_read_ptr_relaxed(®ion->start); mi_assert_internal(start != NULL); void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); mi_assert_internal(blocks_start == p); // not a pointer in our area? @@ -405,9 +405,9 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { uintptr_t map; uintptr_t newmap; do { - map = mi_atomic_read(®ion->map); + map = mi_atomic_read_relaxed(®ion->map); newmap = map & ~mask; - } while (!mi_atomic_compare_exchange(®ion->map, newmap, map)); + } while (!mi_atomic_cas_weak(®ion->map, newmap, map)); } } @@ -419,17 +419,17 @@ void _mi_mem_collect(mi_stats_t* stats) { // free every region that has no segments in use. for (size_t i = 0; i < regions_count; i++) { mem_region_t* region = ®ions[i]; - if (mi_atomic_read(®ion->map) == 0 && region->start != NULL) { + if (mi_atomic_read_relaxed(®ion->map) == 0 && region->start != NULL) { // if no segments used, try to claim the whole region uintptr_t m; do { - m = mi_atomic_read(®ion->map); - } while(m == 0 && !mi_atomic_compare_exchange(®ion->map, ~((uintptr_t)0), 0 )); + m = mi_atomic_read_relaxed(®ion->map); + } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); if (m == 0) { // on success, free the whole region if (region->start != NULL) _mi_os_free((void*)region->start, MI_REGION_SIZE, stats); // and release - region->start = 0; + mi_atomic_write_ptr(®ion->start,NULL); mi_atomic_write(®ion->map,0); } } diff --git a/src/options.c b/src/options.c index b30ff1c6..88f2503e 100644 --- a/src/options.c +++ b/src/options.c @@ -127,7 +127,7 @@ void mi_option_disable(mi_option_t option) { // Messages // -------------------------------------------------------- #define MAX_ERROR_COUNT (10) -static uintptr_t error_count = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings +static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings // When overriding malloc, we may recurse into mi_vfprintf if an allocation // inside the C runtime causes another message. diff --git a/src/os.c b/src/os.c index e7ed57b5..fc9c5acc 100644 --- a/src/os.c +++ b/src/os.c @@ -186,11 +186,11 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations - static volatile intptr_t aligned_base = ((intptr_t)4 << 40); // starting at 4TiB + static volatile _Atomic(intptr_t) aligned_base = ATOMIC_VAR_INIT((intptr_t)4 << 40); // starting at 4TiB if (addr == NULL && try_alignment > 0 && try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) { - intptr_t hint = mi_atomic_add(&aligned_base, size) - size; + intptr_t hint = mi_atomic_add(&aligned_base, size); if (hint%try_alignment == 0) { return VirtualAlloc((void*)hint, size, flags, PAGE_READWRITE); } @@ -214,11 +214,11 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, static volatile uintptr_t large_page_try_ok = 0; void* p = NULL; if (use_large_os_page(size, try_alignment)) { - uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + uintptr_t try_ok = mi_atomic_read_relaxed(&large_page_try_ok); if (try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - mi_atomic_compare_exchange(&large_page_try_ok, try_ok - 1, try_ok); + mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); } else { // large OS pages must always reserve and commit. @@ -253,9 +253,9 @@ static void* mi_unix_mmapx(size_t size, size_t try_alignment, int protect_flags, void* p = NULL; #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations - static volatile intptr_t aligned_base = ((intptr_t)1 << 42); // starting at 4TiB + static volatile _Atomic(intptr_t) aligned_base = ATOMIC_VAR_INIT((intptr_t)1 << 42); // starting at 4TiB if (try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE)==0) { - intptr_t hint = mi_atomic_add(&aligned_base,size) - size; + intptr_t hint = mi_atomic_add(&aligned_base,size); if (hint%try_alignment == 0) { p = mmap((void*)hint,size,protect_flags,flags,fd,0); if (p==MAP_FAILED) p = NULL; // fall back to regular mmap @@ -291,14 +291,14 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) fd = VM_MAKE_TAG(100); #endif if (use_large_os_page(size, try_alignment)) { - static volatile uintptr_t large_page_try_ok = 0; - uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + static volatile _Atomic(uintptr_t) large_page_try_ok = 0; + uintptr_t try_ok = mi_atomic_read_relaxed(&large_page_try_ok); if (try_ok > 0) { // If the OS is not configured for large OS pages, or the user does not have // enough permission, the `mmap` will always fail (but it might also fail for other reasons). // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times // to avoid too many failing calls to mmap. - mi_atomic_compare_exchange(&large_page_try_ok, try_ok - 1, try_ok); + mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); } else { int lflags = flags; diff --git a/src/page.c b/src/page.c index 54897af5..a95f5b51 100644 --- a/src/page.c +++ b/src/page.c @@ -49,11 +49,12 @@ static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { return count; } +/* // Start of the page available memory static inline uint8_t* mi_page_area(const mi_page_t* page) { return _mi_page_start(_mi_page_segment(page), page, NULL); } - +*/ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { size_t psize; @@ -126,7 +127,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { } } while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal - !mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); + !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); } @@ -147,7 +148,7 @@ static void mi_page_thread_free_collect(mi_page_t* page) tfree = page->thread_free; head = mi_tf_block(tfree); tfreex = mi_tf_set_block(tfree,NULL); - } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); + } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); // return if the list is empty if (head == NULL) return; @@ -166,7 +167,7 @@ static void mi_page_thread_free_collect(mi_page_t* page) page->free = head; // update counts now - mi_atomic_subtract(&page->thread_freed, count); + mi_atomic_subu(&page->thread_freed, count); page->used -= count; } @@ -257,7 +258,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* block; do { block = (mi_block_t*)heap->thread_delayed_free; - } while (block != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&heap->thread_delayed_free, NULL, block)); + } while (block != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), NULL, block)); // and free them all while(block != NULL) { @@ -270,7 +271,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { do { dfree = (mi_block_t*)heap->thread_delayed_free; mi_block_set_nextx(heap->cookie, block, dfree); - } while (!mi_atomic_compare_exchange_ptr((volatile void**)&heap->thread_delayed_free, block, dfree)); + } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); } block = next; diff --git a/src/segment.c b/src/segment.c index 18c06fbc..9a744ea6 100644 --- a/src/segment.c +++ b/src/segment.c @@ -542,8 +542,8 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) // live blocks (reached through other threads). Such segments // are "abandoned" and will be reclaimed by other threads to // reuse their pages and/or free them eventually -static volatile mi_segment_t* abandoned = NULL; -static volatile uintptr_t abandoned_count = 0; +static volatile _Atomic(mi_segment_t*) abandoned; // = NULL; +static volatile _Atomic(uintptr_t) abandoned_count; // = 0; static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); @@ -561,9 +561,9 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { segment->thread_id = 0; mi_segment_t* next; do { - next = (mi_segment_t*)abandoned; - mi_atomic_write_ptr((volatile void**)&segment->abandoned_next, next); - } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, next)); + next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&abandoned)); + mi_atomic_write_ptr(mi_atomic_cast(void*,&segment->abandoned_next), next); + } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), segment, next)); mi_atomic_increment(&abandoned_count); } @@ -597,7 +597,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_t* segment; do { segment = (mi_segment_t*)abandoned; - } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, (mi_segment_t*)segment->abandoned_next, segment)); + } while(segment != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), (mi_segment_t*)segment->abandoned_next, segment)); if (segment==NULL) break; // stop early if no more segments available // got it. diff --git a/src/stats.c b/src/stats.c index 39015f94..2176ba17 100644 --- a/src/stats.c +++ b/src/stats.c @@ -38,13 +38,13 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (mi_is_in_main(stat)) { // add atomically (for abandoned pages) - int64_t current = mi_atomic_add(&stat->current,amount); + int64_t current = mi_atomic_add64(&stat->current,amount); if (current > stat->peak) stat->peak = stat->current; // racing.. it's ok if (amount > 0) { - mi_atomic_add(&stat->allocated,amount); + mi_atomic_add64(&stat->allocated,amount); } else { - mi_atomic_add(&stat->freed, -amount); + mi_atomic_add64(&stat->freed, -amount); } } else { @@ -62,8 +62,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { if (mi_is_in_main(stat)) { - mi_atomic_add( &stat->count, 1 ); - mi_atomic_add( &stat->total, (int64_t)amount ); + mi_atomic_add64( &stat->count, 1 ); + mi_atomic_add64( &stat->total, (int64_t)amount ); } else { stat->count++; @@ -82,16 +82,16 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { // must be thread safe as it is called from stats_merge static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { if (stat==src) return; - mi_atomic_add( &stat->allocated, src->allocated * unit); - mi_atomic_add( &stat->current, src->current * unit); - mi_atomic_add( &stat->freed, src->freed * unit); - mi_atomic_add( &stat->peak, src->peak * unit); + mi_atomic_add64( &stat->allocated, src->allocated * unit); + mi_atomic_add64( &stat->current, src->current * unit); + mi_atomic_add64( &stat->freed, src->freed * unit); + mi_atomic_add64( &stat->peak, src->peak * unit); } static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) { if (stat==src) return; - mi_atomic_add( &stat->total, src->total * unit); - mi_atomic_add( &stat->count, src->count * unit); + mi_atomic_add64( &stat->total, src->total * unit); + mi_atomic_add64( &stat->count, src->count * unit); } // must be thread safe as it is called from stats_merge From baabc775034efeb55a93c8088492933e56d8334f Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 25 Aug 2019 23:02:41 -0700 Subject: [PATCH 20/40] use proper atomic initialization macros --- src/init.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/init.c b/src/init.c index 76e586f2..290caeec 100644 --- a/src/init.c +++ b/src/init.c @@ -19,7 +19,8 @@ const mi_page_t _mi_page_empty = { 0, #endif 0, // used - NULL, 0, 0, + NULL, + ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), 0, NULL, NULL, NULL #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0) , { NULL } // padding @@ -81,7 +82,7 @@ const mi_heap_t _mi_heap_empty = { NULL, MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, - NULL, + ATOMIC_VAR_INIT(NULL), 0, 0, 0, From 2159c224151e5be1f3bcf73acefe62eef17d080f Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 25 Aug 2019 23:06:18 -0700 Subject: [PATCH 21/40] fix atomic declaration on windows --- src/os.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/os.c b/src/os.c index fc9c5acc..fb36f3fc 100644 --- a/src/os.c +++ b/src/os.c @@ -211,7 +211,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment } static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags) { - static volatile uintptr_t large_page_try_ok = 0; + static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; if (use_large_os_page(size, try_alignment)) { uintptr_t try_ok = mi_atomic_read_relaxed(&large_page_try_ok); @@ -291,7 +291,7 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) fd = VM_MAKE_TAG(100); #endif if (use_large_os_page(size, try_alignment)) { - static volatile _Atomic(uintptr_t) large_page_try_ok = 0; + static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_read_relaxed(&large_page_try_ok); if (try_ok > 0) { // If the OS is not configured for large OS pages, or the user does not have From 5c7c106d62f70db566e337abd6575021ec55f1bf Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 26 Aug 2019 08:11:15 -0700 Subject: [PATCH 22/40] strengthen some atomic operations for weak memory models --- include/mimalloc-atomic.h | 45 +++++++++++++++++++++++++++------------ src/alloc.c | 18 +++++++++------- src/memory.c | 20 ++++++++--------- src/stats.c | 4 ++-- 4 files changed, 53 insertions(+), 34 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 739d0512..3a289feb 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -30,26 +30,32 @@ terms of the MIT license. A copy of the license can be found in the file // ------------------------------------------------------ // Atomically add a 64-bit value; returns the previous value. -// Note: not using _Atomic(int64_t) as it is only used for stats. -static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add); +// Note: not using _Atomic(int64_t) as it is only used for statistics. +static inline void mi_atomic_add64(volatile int64_t* p, int64_t add); -// Atomically add a value; returns the previous value. +// Atomically add a value; returns the previous value. Memory ordering is relaxed. static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add); -// Atomically compare and exchange a value; returns `true` if successful. May fail spuriously. +// Atomically compare and exchange a value; returns `true` if successful. +// May fail spuriously. Memory ordering as release on success, and relaxed on failure. // (Note: expected and desired are in opposite order from atomic_compare_exchange) static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); // Atomically compare and exchange a value; returns `true` if successful. +// Memory ordering is acquire-release +// (Note: expected and desired are in opposite order from atomic_compare_exchange) static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); -// Atomically exchange a value. +// Atomically exchange a value. Memory ordering is acquire-release. static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange); -// Atomically read a value +// Atomically read a value. Memory ordering is relaxed. static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p); -// Atomically write a value +// Atomically read a value. Memory ordering is acquire. +static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p); + +// Atomically write a value. Memory ordering is release. static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x); // Yield @@ -76,11 +82,16 @@ static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) { return mi_atomic_subu(p, 1); } -// Atomically read a pointer +// Atomically read a pointer; Memory order is relaxed. static inline void* mi_atomic_read_ptr_relaxed(volatile _Atomic(void*) const * p) { return (void*)mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)p); } +// Atomically read a pointer; Memory order is acquire. +static inline void* mi_atomic_read_ptr(volatile _Atomic(void*) const * p) { + return (void*)mi_atomic_read((const volatile _Atomic(uintptr_t)*)p); +} + // Atomically write a pointer static inline void mi_atomic_write_ptr(volatile _Atomic(void*)* p, void* x) { mi_atomic_write((volatile _Atomic(uintptr_t)*)p, (uintptr_t)x ); @@ -127,18 +138,21 @@ static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } -static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { +static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { return *p; } +static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { + return mi_atomic_read(p); +} static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { mi_atomic_exchange(p,x); } static inline void mi_atomic_yield(void) { YieldProcessor(); } -static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { +static inline void mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { #ifdef _WIN64 - return mi_atomic_add(p,add); + mi_atomic_add(p,add); #else int64_t current; int64_t sum; @@ -146,7 +160,6 @@ static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) current = *p; sum = current + add; } while (_InterlockedCompareExchange64(p, sum, current) != current); - return current; #endif } @@ -156,9 +169,9 @@ static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) #else #define MI_USING_STD #endif -static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add) { +static inline void mi_atomic_add64(volatile int64_t* p, int64_t add) { MI_USING_STD - return atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); + atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); } static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { MI_USING_STD @@ -180,6 +193,10 @@ static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t) MI_USING_STD return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed); } +static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p) { + MI_USING_STD + return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_acquire); +} static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { MI_USING_STD return atomic_store_explicit(p, x, memory_order_release); diff --git a/src/alloc.c b/src/alloc.c index 97c5fcc4..7e89a591 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -118,22 +118,24 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_segment_t* segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { // huge page segments are always abandoned and can be freed immediately - mi_assert_internal(segment->thread_id==0); - mi_assert_internal(segment->abandoned_next==NULL); + mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&segment->abandoned_next))==NULL); // claim it and free - mi_block_set_next(page, block, page->free); - page->free = block; - page->used--; mi_heap_t* heap = mi_get_default_heap(); - segment->thread_id = heap->thread_id; - _mi_segment_page_free(page,true,&heap->tld->segments); + // paranoia: if this it the last reference, the cas should always succeed + if (mi_atomic_cas_strong(&segment->thread_id,heap->thread_id,0)) { + mi_block_set_next(page, block, page->free); + page->free = block; + page->used--; + _mi_segment_page_free(page,true,&heap->tld->segments); + } return; } do { tfree = page->thread_free; use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE || - (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == page->thread_freed+1) + (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == mi_atomic_read_relaxed(&page->thread_freed)+1) // data-race but ok, just optimizes early release of the page ); if (mi_unlikely(use_delayed)) { // unlikely: this only happens on the first concurrent free in a page that is in the full list diff --git a/src/memory.c b/src/memory.c index 1ea6ee16..268dc153 100644 --- a/src/memory.c +++ b/src/memory.c @@ -131,7 +131,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit mi_assert_internal(®ions[idx] == region); // ensure the region is reserved - void* start = mi_atomic_read_ptr_relaxed(®ion->start); + void* start = mi_atomic_read_ptr(®ion->start); if (start == NULL) { start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_region_commit), tld); @@ -154,9 +154,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // we assign it to a later slot instead (up to 4 tries). // note: we don't need to increment the region count, this will happen on another allocation for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { - void* s = mi_atomic_read_ptr_relaxed(®ions[idx+i].start); + void* s = mi_atomic_read_ptr(®ions[idx+i].start); if (s == NULL) { // quick test - if (mi_atomic_cas_ptr_weak(®ions[idx+i].start, start, s)) { + if (mi_atomic_cas_ptr_strong(®ions[idx+i].start, start, NULL)) { start = NULL; break; } @@ -167,10 +167,10 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit _mi_os_free(start, MI_REGION_SIZE, tld->stats); } // and continue with the memory at our index - start = mi_atomic_read_ptr_relaxed(®ion->start); + start = mi_atomic_read_ptr(®ion->start); } } - mi_assert_internal(start == mi_atomic_read_ptr_relaxed(®ion->start)); + mi_assert_internal(start == mi_atomic_read_ptr(®ion->start)); mi_assert_internal(start != NULL); // Commit the blocks to memory @@ -230,7 +230,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc const uintptr_t mask = mi_region_block_mask(blocks, 0); const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; - uintptr_t map = mi_atomic_read_relaxed(®ion->map); + uintptr_t map = mi_atomic_read(®ion->map); #ifdef MI_HAVE_BITSCAN size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible @@ -245,9 +245,9 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc mi_assert_internal((m >> bitidx) == mask); // no overflow? uintptr_t newmap = map | m; mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_strong(®ion->map, newmap, map)) { + if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { // no success, another thread claimed concurrently.. keep going - map = mi_atomic_read_relaxed(®ion->map); + map = mi_atomic_read(®ion->map); continue; } else { @@ -317,7 +317,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* // find a range of free blocks void* p = NULL; - size_t count = mi_atomic_read_relaxed(®ions_count); + size_t count = mi_atomic_read(®ions_count); size_t idx = tld->region_idx; // start index is per-thread to reduce contention for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around @@ -377,7 +377,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? mem_region_t* region = ®ions[idx]; mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed? - void* start = mi_atomic_read_ptr_relaxed(®ion->start); + void* start = mi_atomic_read_ptr(®ion->start); mi_assert_internal(start != NULL); void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); mi_assert_internal(blocks_start == p); // not a pointer in our area? diff --git a/src/stats.c b/src/stats.c index 2176ba17..4dddb4bc 100644 --- a/src/stats.c +++ b/src/stats.c @@ -38,8 +38,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (mi_is_in_main(stat)) { // add atomically (for abandoned pages) - int64_t current = mi_atomic_add64(&stat->current,amount); - if (current > stat->peak) stat->peak = stat->current; // racing.. it's ok + mi_atomic_add64(&stat->current,amount); + if (stat->current > stat->peak) stat->peak = stat->current; // racing.. it's ok if (amount > 0) { mi_atomic_add64(&stat->allocated,amount); } From 7ce9c02fd40796e4392892c0d413a0ac3462d112 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 26 Aug 2019 08:20:26 -0700 Subject: [PATCH 23/40] make cas weak use release memory order; improve free assembly --- include/mimalloc-atomic.h | 2 +- src/alloc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 3a289feb..8b254d3e 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -179,7 +179,7 @@ static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add } static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { MI_USING_STD - return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_relaxed); + return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed); } static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { MI_USING_STD diff --git a/src/alloc.c b/src/alloc.c index 7e89a591..afc181dd 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -226,7 +226,7 @@ void mi_free(void* p) mi_attr_noexcept #endif const mi_segment_t* const segment = _mi_ptr_segment(p); - if (segment == NULL) return; // checks for (p==NULL) + if (mi_unlikely(segment == NULL)) return; // checks for (p==NULL) #if (MI_DEBUG>0) if (mi_unlikely(!mi_is_in_heap_region(p))) { From 2c19388bcfc08fa2acb3b4e58c569b7ff4b060e7 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 26 Aug 2019 11:44:41 -0700 Subject: [PATCH 24/40] initialize mimalloc options at process load --- include/mimalloc-internal.h | 1 + src/init.c | 1 + src/options.c | 7 +++++++ 3 files changed, 9 insertions(+) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d886bcec..3889c66e 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -27,6 +27,7 @@ void _mi_error_message(const char* fmt, ...); void _mi_warning_message(const char* fmt, ...); void _mi_verbose_message(const char* fmt, ...); void _mi_trace_message(const char* fmt, ...); +void _mi_options_init(void); // "init.c" extern mi_stats_t _mi_stats_main; diff --git a/src/init.c b/src/init.c index 290caeec..4c7fdda0 100644 --- a/src/init.c +++ b/src/init.c @@ -416,6 +416,7 @@ static void mi_allocator_done() { static void mi_process_load(void) { os_preloading = false; atexit(&mi_process_done); + _mi_options_init(); mi_process_init(); //mi_stats_reset(); if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); diff --git a/src/options.c b/src/options.c index 88f2503e..16c50f11 100644 --- a/src/options.c +++ b/src/options.c @@ -73,6 +73,13 @@ static mi_option_desc_t options[_mi_option_last] = static void mi_option_init(mi_option_desc_t* desc); +void _mi_options_init(void) { + // called on process load + for(int i = 0; i < _mi_option_last; i++ ) { + mi_option_get((mi_option_t)i); // initialize + } +} + long mi_option_get(mi_option_t option) { mi_assert(option >= 0 && option < _mi_option_last); mi_option_desc_t* desc = &options[option]; From 8b06ab1e4946005e4bf8c067c33c53b2647aaf39 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 26 Aug 2019 12:41:35 -0700 Subject: [PATCH 25/40] fix check on gigabyte alignment of huge os pages on windows --- src/os.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/os.c b/src/os.c index 1c7696b4..5d1b7576 100644 --- a/src/os.c +++ b/src/os.c @@ -198,7 +198,7 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - if ((size % (uintptr_t)1 << 30) == 0 /* 1GiB multiple */ + if ((size % ((uintptr_t)1 << 30)) == 0 /* 1GiB multiple */ && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0) && pNtAllocateVirtualMemoryEx != NULL) @@ -217,7 +217,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment } else { // else fall back to regular large OS pages - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %lx)\n", err); + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error 0x%lx)\n", err); } } #endif From 3d8c331a1c3994a8727528487c956fddf81e2519 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 26 Aug 2019 12:41:59 -0700 Subject: [PATCH 26/40] search regions always from the lowest index --- src/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index 268dc153..222b87c2 100644 --- a/src/memory.c +++ b/src/memory.c @@ -318,7 +318,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* // find a range of free blocks void* p = NULL; size_t count = mi_atomic_read(®ions_count); - size_t idx = tld->region_idx; // start index is per-thread to reduce contention + size_t idx = 0; // tld->region_idx; // start index is per-thread to reduce contention for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error From f0a12699c208191afad6373a64a71c76af7bdb05 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 26 Aug 2019 12:42:25 -0700 Subject: [PATCH 27/40] remove atomic_iread --- include/mimalloc-atomic.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 9549cbc3..8b254d3e 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -114,9 +114,6 @@ static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exc return (void*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)p, (uintptr_t)exchange); } -static inline intptr_t mi_atomic_iread(volatile intptr_t* p) { - return (intptr_t)mi_atomic_read( (volatile uintptr_t*)p ); -} #ifdef _MSC_VER #define WIN32_LEAN_AND_MEAN From eea093000a30b2e069b77f803217622e3901b0b9 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 26 Aug 2019 13:47:52 -0700 Subject: [PATCH 28/40] graceful fallback for huge page allocation on Linux --- src/os.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/os.c b/src/os.c index 5d1b7576..7648c010 100644 --- a/src/os.c +++ b/src/os.c @@ -369,6 +369,13 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro if (large_only || lflags != flags) { // try large OS page allocation p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); + #ifdef MAP_HUGE_1GB + if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); + lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); + p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); + } + #endif if (large_only) return p; if (p == NULL) { mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations From db8d443ae661870af5c0815b7cdb0e3bdcb0f13b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 26 Aug 2019 22:45:26 -0700 Subject: [PATCH 29/40] track more precisely if memory is fixed or committed --- include/mimalloc-internal.h | 4 +- include/mimalloc-types.h | 11 +++- src/memory.c | 128 +++++++++++++++++++++--------------- src/options.c | 2 +- src/os.c | 98 +++++++++++++++++---------- src/segment.c | 55 ++++++++-------- 6 files changed, 176 insertions(+), 122 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 447df7df..9b3a3907 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -45,8 +45,8 @@ void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocat void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data // memory.c -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* id, mi_os_tld_t* tld); -void* _mi_mem_alloc(size_t size, bool commit, size_t* id, mi_os_tld_t* tld); +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, size_t* id, mi_os_tld_t* tld); +void* _mi_mem_alloc(size_t size, bool commit, bool* large, size_t* id, mi_os_tld_t* tld); void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 0b2334b8..4bf51d1d 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -167,7 +167,7 @@ typedef struct mi_page_s { bool is_committed:1; // `true` if the page virtual memory is committed // layout like this to optimize access in `mi_malloc` and `mi_free` - uint16_t capacity; // number of blocks committed + uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (16 bits) @@ -207,7 +207,13 @@ typedef enum mi_page_kind_e { // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { - struct mi_segment_s* next; + // memory fields + size_t memid; // id for the os-level memory manager + bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) + bool mem_is_committed; // `true` if the whole segment is eagerly committed + + // segment fields + struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` struct mi_segment_s* prev; volatile _Atomic(struct mi_segment_s*) abandoned_next; size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) @@ -216,7 +222,6 @@ typedef struct mi_segment_s { size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` - size_t memid; // id for the os-level memory manager // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). diff --git a/src/memory.c b/src/memory.c index 222b87c2..a9b87b8e 100644 --- a/src/memory.c +++ b/src/memory.c @@ -39,14 +39,14 @@ Possible issues: // Internal raw OS interface size_t _mi_os_large_page_size(); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* p, size_t size, mi_stats_t* stats); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld); - +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_commit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_unreset(void* p, size_t size, mi_stats_t* stats); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); +bool _mi_os_is_huge_reserved(void* p); // Constants #if (MI_INTPTR_SIZE==8) @@ -66,11 +66,24 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld #define MI_REGION_MAP_FULL UINTPTR_MAX +typedef uintptr_t mi_region_info_t; + +static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { + return ((uintptr_t)start | ((is_large?1:0) << 1) | (is_committed?1:0)); +} + +static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) { + if (is_large) *is_large = ((info&0x02) != 0); + if (is_committed) *is_committed = ((info&0x01) != 0); + return (void*)(info & ~0x03); +} + + // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block - volatile _Atomic(void*) start; // start of virtual memory area + volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block + volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags } mem_region_t; @@ -108,7 +121,7 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; size_t count = mi_atomic_read_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(®ions[i].start); + uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(®ions[i].info), NULL, NULL); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; @@ -123,7 +136,7 @@ Commit from a region // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) +static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld) { size_t mask = mi_region_block_mask(blocks,bitidx); mi_assert_internal(mask != 0); @@ -131,10 +144,14 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit mi_assert_internal(®ions[idx] == region); // ensure the region is reserved - void* start = mi_atomic_read_ptr(®ion->start); - if (start == NULL) + mi_region_info_t info = mi_atomic_read(®ion->info); + if (info == 0) { - start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_region_commit), tld); + bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); + bool region_large = region_commit && *large; + void* start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); + *large = region_large; + if (start == NULL) { // failure to allocate from the OS! unclaim the blocks and fail size_t map; @@ -145,7 +162,8 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit } // set the newly allocated region - if (mi_atomic_cas_ptr_strong(®ion->start, start, NULL)) { + info = mi_region_info_create(start,region_large,region_commit); + if (mi_atomic_cas_strong(®ion->info, info, 0)) { // update the region count mi_atomic_increment(®ions_count); } @@ -154,12 +172,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // we assign it to a later slot instead (up to 4 tries). // note: we don't need to increment the region count, this will happen on another allocation for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { - void* s = mi_atomic_read_ptr(®ions[idx+i].start); - if (s == NULL) { // quick test - if (mi_atomic_cas_ptr_strong(®ions[idx+i].start, start, NULL)) { - start = NULL; - break; - } + if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { + start = NULL; + break; } } if (start != NULL) { @@ -167,15 +182,17 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit _mi_os_free(start, MI_REGION_SIZE, tld->stats); } // and continue with the memory at our index - start = mi_atomic_read_ptr(®ion->start); + info = mi_atomic_read(®ion->info); } } - mi_assert_internal(start == mi_atomic_read_ptr(®ion->start)); - mi_assert_internal(start != NULL); + mi_assert_internal(info == mi_atomic_read(®ion->info)); + mi_assert_internal(info != 0); // Commit the blocks to memory + bool region_is_committed = false; + void* start = mi_region_info_read(info,large,®ion_is_committed); void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); - if (commit && !mi_option_is_enabled(mi_option_eager_region_commit)) { + if (commit && !region_is_committed) { _mi_os_commit(blocks_start, mi_good_commit_size(size), tld->stats); // only commit needed size (unless using large OS pages) } @@ -223,7 +240,7 @@ static inline size_t mi_bsr(uintptr_t x) { // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) +static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld) { mi_assert_internal(p != NULL && id != NULL); mi_assert_internal(blocks < MI_REGION_MAP_BITS); @@ -253,7 +270,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc else { // success, we claimed the bits // now commit the block memory -- this can still fail - return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, p, id, tld); + return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, large, p, id, tld); } } else { @@ -276,14 +293,14 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) +static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld) { // check if there are available blocks in the region.. mi_assert_internal(idx < MI_REGION_MAX); mem_region_t* region = ®ions[idx]; uintptr_t m = mi_atomic_read_relaxed(®ion->map); if (m != MI_REGION_MAP_FULL) { // some bits are zero - return mi_region_alloc_blocks(region, idx, blocks, size, commit, p, id, tld); + return mi_region_alloc_blocks(region, idx, blocks, size, commit, large, p, id, tld); } else { return true; // no error, but no success either @@ -296,15 +313,17 @@ static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, b // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* id, mi_os_tld_t* tld) +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, size_t* id, mi_os_tld_t* tld) { mi_assert_internal(id != NULL && tld != NULL); mi_assert_internal(size > 0); *id = SIZE_MAX; + bool default_large = false; + if (large==NULL) large = &default_large; // ensure `large != NULL` // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`) if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { - return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, true, tld); // round up size + return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, tld); // round up size } // always round size to OS page size multiple (so commit/decommit go over the entire range) @@ -318,27 +337,27 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* // find a range of free blocks void* p = NULL; size_t count = mi_atomic_read(®ions_count); - size_t idx = 0; // tld->region_idx; // start index is per-thread to reduce contention + size_t idx = 0; // tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error + if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, &p, id, tld)) return NULL; // error if (p != NULL) break; } if (p == NULL) { // no free range in existing regions -- try to extend beyond the count.. but at most 4 regions for (idx = count; idx < count + 4 && idx < MI_REGION_MAX; idx++) { - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error + if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, &p, id, tld)) return NULL; // error if (p != NULL) break; } } if (p == NULL) { // we could not find a place to allocate, fall back to the os directly - p = _mi_os_alloc_aligned(size, alignment, commit, tld); + p = _mi_os_alloc_aligned(size, alignment, commit, large, tld); } else { - tld->region_idx = idx; // next start of search + tld->region_idx = idx; // next start of search? } mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); @@ -347,8 +366,8 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* // Allocate `size` memory. Return non NULL on success, with a given memory `id`. -void* _mi_mem_alloc(size_t size, bool commit, size_t* id, mi_os_tld_t* tld) { - return _mi_mem_alloc_aligned(size,0,commit,id,tld); +void* _mi_mem_alloc(size_t size, bool commit, bool* large, size_t* id, mi_os_tld_t* tld) { + return _mi_mem_alloc_aligned(size,0,commit,large,id,tld); } /* ---------------------------------------------------------------------------- @@ -377,7 +396,10 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? mem_region_t* region = ®ions[idx]; mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed? - void* start = mi_atomic_read_ptr(®ion->start); + mi_region_info_t info = mi_atomic_read(®ion->info); + bool is_large; + bool is_eager_committed; + void* start = mi_region_info_read(info,&is_large,&is_eager_committed); mi_assert_internal(start != NULL); void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); mi_assert_internal(blocks_start == p); // not a pointer in our area? @@ -388,18 +410,13 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // TODO: implement delayed decommit/reset as these calls are too expensive // if the memory is reused soon. // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large - if (!mi_option_is_enabled(mi_option_large_os_pages)) { - if (mi_option_is_enabled(mi_option_eager_region_commit)) { - //_mi_os_reset(p, size, stats); - } - else { - //_mi_os_decommit(p, size, stats); - } - } + if (!is_large) { + // _mi_os_reset(p,size,stats); + // _mi_os_decommit(p,size,stats); // if !is_committed + } // TODO: should we free empty regions? currently only done _mi_mem_collect. - // this frees up virtual address space which - // might be useful on 32-bit systems? + // this frees up virtual address space which might be useful on 32-bit systems? // and unclaim uintptr_t map; @@ -419,17 +436,20 @@ void _mi_mem_collect(mi_stats_t* stats) { // free every region that has no segments in use. for (size_t i = 0; i < regions_count; i++) { mem_region_t* region = ®ions[i]; - if (mi_atomic_read_relaxed(®ion->map) == 0 && region->start != NULL) { + if (mi_atomic_read_relaxed(®ion->map) == 0) { // if no segments used, try to claim the whole region uintptr_t m; do { m = mi_atomic_read_relaxed(®ion->map); } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); if (m == 0) { - // on success, free the whole region - if (region->start != NULL) _mi_os_free((void*)region->start, MI_REGION_SIZE, stats); + // on success, free the whole region (unless it was huge reserved) + void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, NULL); + if (start != NULL && !_mi_os_is_huge_reserved(start)) { + _mi_os_free(start, MI_REGION_SIZE, stats); + } // and release - mi_atomic_write_ptr(®ion->start,NULL); + mi_atomic_write(®ion->info,0); mi_atomic_write(®ion->map,0); } } diff --git a/src/options.c b/src/options.c index 11bb78d8..41bf33a0 100644 --- a/src/options.c +++ b/src/options.c @@ -58,7 +58,7 @@ static mi_option_desc_t options[_mi_option_last] = #endif // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // note: if eager_region_commit is on, this should be on too. + { 1, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled #ifdef _WIN32 // and BSD? { 1, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) #else diff --git a/src/os.c b/src/os.c index 7648c010..e1306722 100644 --- a/src/os.c +++ b/src/os.c @@ -35,10 +35,9 @@ terms of the MIT license. A copy of the license can be found in the file On windows initializes support for aligned allocation and large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). ----------------------------------------------------------- */ -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); - -static bool mi_os_is_huge_reserved(void* p); -static void* mi_os_alloc_from_huge_reserved(size_t size, size_t try_alignment, bool commit); +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_is_huge_reserved(void* p); +static void* mi_os_alloc_from_huge_reserved(size_t size, size_t try_alignment, bool commit); static void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); @@ -173,7 +172,7 @@ void _mi_os_init() { static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) { - if (addr == NULL || size == 0 || mi_os_is_huge_reserved(addr)) return true; + if (addr == NULL || size == 0 || _mi_os_is_huge_reserved(addr)) return true; bool err = false; #if defined(_WIN32) err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); @@ -199,7 +198,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages if ((size % ((uintptr_t)1 << 30)) == 0 /* 1GiB multiple */ - && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 + && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0 && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0) && pNtAllocateVirtualMemoryEx != NULL) { @@ -211,7 +210,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment param.ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; SIZE_T psize = size; void* base = addr; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags | MEM_RESERVE, PAGE_READWRITE, ¶m, 1); + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, ¶m, 1); if (err == 0) { return base; } @@ -247,10 +246,12 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } -static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only) { +static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { + mi_assert_internal(!(large_only && !allow_large)); static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; - if (large_only || use_large_os_page(size, try_alignment)) { + if ((large_only || use_large_os_page(size, try_alignment)) + && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. @@ -259,7 +260,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, } else { // large OS pages must always reserve and commit. - p = mi_win_virtual_allocx(addr, size, try_alignment, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE | flags); + *is_large = true; + p = mi_win_virtual_allocx(addr, size, try_alignment, flags | MEM_LARGE_PAGES); if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { @@ -268,6 +270,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, } } if (p == NULL) { + *is_large = ((flags&MEM_LARGE_PAGES) != 0); p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } if (p == NULL) { @@ -311,7 +314,7 @@ static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int pr return p; } -static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only) { +static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { void* p = NULL; #if !defined(MAP_ANONYMOUS) #define MAP_ANONYMOUS MAP_ANON @@ -333,7 +336,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) fd = VM_MAKE_TAG(100); #endif - if (large_only || use_large_os_page(size, try_alignment)) { + if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { @@ -368,6 +371,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro #endif if (large_only || lflags != flags) { // try large OS page allocation + *is_large = true; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { @@ -384,6 +388,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro } } if (p == NULL) { + *is_large = false; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead @@ -392,8 +397,10 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // in that case -- in particular for our large regions (in `memory.c`). // However, some systems only allow TPH if called with explicit `madvise`, so // when large OS pages are enabled for mimalloc, we call `madvice` anyways. - if (use_large_os_page(size, try_alignment)) { - madvise(p, size, MADV_HUGEPAGE); + if (allow_large && use_large_os_page(size, try_alignment)) { + if (madvise(p, size, MADV_HUGEPAGE) == 0) { + *is_large = true; // possibly + }; } #endif } @@ -403,27 +410,35 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // Primitive allocation from the OS. // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, mi_stats_t* stats) { +static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); if (size == 0) return NULL; + if (!commit) allow_large = false; - void* p = mi_os_alloc_from_huge_reserved(size, try_alignment, commit); - if (p != NULL) return p; + void* p = NULL; + if (allow_large) { + p = mi_os_alloc_from_huge_reserved(size, try_alignment, commit); + if (p != NULL) { + *is_large = true; + return p; + } + } #if defined(_WIN32) int flags = MEM_RESERVE; if (commit) flags |= MEM_COMMIT; - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false); + p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); #elif defined(__wasi__) + *is_large = false; p = mi_wasm_heap_grow(size, try_alignment); #else int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false); + p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); #endif _mi_stat_increase(&stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); - if (commit) _mi_stat_increase(&stats->committed, size); + if (commit) { _mi_stat_increase(&stats->committed, size); } } return p; } @@ -431,14 +446,15 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, mi_ // Primitive aligned allocation from the OS. // This function guarantees the allocated memory is aligned. -static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, mi_stats_t* stats) { +static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + if (!commit) allow_large = false; if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; size = _mi_align_up(size, _mi_os_page_size()); // try first with a hint (this will be aligned directly on Win 10+ or BSD) - void* p = mi_os_mem_alloc(size, alignment, commit, stats); + void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats); if (p == NULL) return NULL; // if not aligned, free it, overallocate, and unmap around it @@ -457,7 +473,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, if (commit) flags |= MEM_COMMIT; for (int tries = 0; tries < 3; tries++) { // over-allocate to determine a virtual memory range - p = mi_os_mem_alloc(over_size, alignment, commit, stats); + p = mi_os_mem_alloc(over_size, alignment, commit, false, is_large, stats); if (p == NULL) return NULL; // error if (((uintptr_t)p % alignment) == 0) { // if p happens to be aligned, just decommit the left-over area @@ -468,7 +484,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, // otherwise free and allocate at an aligned address in there mi_os_mem_free(p, over_size, stats); void* aligned_p = mi_align_up_ptr(p, alignment); - p = mi_win_virtual_alloc(aligned_p, size, alignment, flags, false); + p = mi_win_virtual_alloc(aligned_p, size, alignment, flags, false, allow_large, is_large); if (p == aligned_p) break; // success! if (p != NULL) { // should not happen? mi_os_mem_free(p, size, stats); @@ -478,7 +494,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, } #else // overallocate... - p = mi_os_mem_alloc(over_size, alignment, commit, stats); + p = mi_os_mem_alloc(over_size, alignment, commit, false, is_large, stats); if (p == NULL) return NULL; // and selectively unmap parts around the over-allocated area. void* aligned_p = mi_align_up_ptr(p, alignment); @@ -504,7 +520,8 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, void* _mi_os_alloc(size_t size, mi_stats_t* stats) { if (size == 0) return NULL; size = mi_os_good_alloc_size(size, 0); - return mi_os_mem_alloc(size, 0, true, stats); + bool is_large = false; + return mi_os_mem_alloc(size, 0, true, false, &is_large, stats); } void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { @@ -513,12 +530,17 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { mi_os_mem_free(p, size, stats); } -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld) { if (size == 0) return NULL; size = mi_os_good_alloc_size(size, alignment); alignment = _mi_align_up(alignment, _mi_os_page_size()); - return mi_os_mem_alloc_aligned(size, alignment, commit, tld->stats); + bool allow_large = false; + if (large != NULL) { + allow_large = *large; + *large = false; + } + return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats); } @@ -559,7 +581,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ // page align in the range, commit liberally, decommit conservative size_t csize; void* start = mi_os_page_align_areax(conservative, addr, size, &csize); - if (csize == 0 || mi_os_is_huge_reserved(addr)) return true; + if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; int err = 0; if (commit) { _mi_stat_increase(&stats->committed, csize); @@ -611,7 +633,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); - if (csize == 0 || mi_os_is_huge_reserved(addr)) return true; + if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; if (reset) _mi_stat_increase(&stats->reset, csize); else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! @@ -626,6 +648,11 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); + #if 0 + if (p == start) { + VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set + } + #endif if (p != start) return false; #else #if defined(MADV_FREE) @@ -679,8 +706,8 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { size_t csize = 0; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return false; - if (mi_os_is_huge_reserved(addr)) { - _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); + if (_mi_os_is_huge_reserved(addr)) { + _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } int err = 0; #ifdef _WIN32 @@ -742,7 +769,7 @@ typedef struct mi_huge_info_s { static mi_huge_info_t os_huge_reserved = { NULL, 0, ATOMIC_VAR_INIT(0) }; -static bool mi_os_is_huge_reserved(void* p) { +bool _mi_os_is_huge_reserved(void* p) { return (mi_atomic_read_ptr(&os_huge_reserved.start) != NULL && p >= mi_atomic_read_ptr(&os_huge_reserved.start) && (uint8_t*)p < (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + mi_atomic_read(&os_huge_reserved.reserved)); @@ -806,10 +833,11 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) { // allocate lorgu pages void* p = NULL; + bool is_large = true; #ifdef _WIN32 - p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true); + p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true, true, &is_large); #elif defined(MI_OS_USE_MMAP) - p = mi_unix_mmap(addr, MI_HUGE_OS_PAGE_SIZE, 0, PROT_READ | PROT_WRITE, true); + p = mi_unix_mmap(addr, MI_HUGE_OS_PAGE_SIZE, 0, PROT_READ | PROT_WRITE, true, true, &is_large); #else // always fail #endif diff --git a/src/segment.c b/src/segment.c index 9a744ea6..020d53e8 100644 --- a/src/segment.c +++ b/src/segment.c @@ -229,6 +229,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se segment->thread_id = 0; mi_segments_track_size(-((long)segment_size),tld); if (mi_option_is_enabled(mi_option_secure)) { + mi_assert_internal(!segment->mem_is_fixed); _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set } _mi_mem_free(segment, segment_size, segment->memid, tld->stats); @@ -277,7 +278,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) return false; } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); - if (mi_option_is_enabled(mi_option_cache_reset)) { + if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) { _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); } segment->next = tld->cache; @@ -325,11 +326,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Try to get it from our thread local cache first - bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM); + bool eager = mi_option_is_enabled(mi_option_eager_commit); + bool commit = eager || (page_kind > MI_PAGE_MEDIUM); bool protection_still_good = false; mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { if (mi_option_is_enabled(mi_option_secure)) { + mi_assert_internal(!segment->mem_is_fixed); if (segment->page_kind != page_kind) { _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs } @@ -337,37 +340,38 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, protection_still_good = true; // otherwise, the guard pages are still in place } } - if (!mi_option_is_enabled(mi_option_eager_commit)) { - if (page_kind > MI_PAGE_MEDIUM) { - _mi_mem_commit(segment, segment->segment_size, tld->stats); - } - else { - // ok, commit (and unreset) on demand again - } + if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) { + mi_assert_internal(!segment->mem_is_fixed); + _mi_mem_commit(segment, segment->segment_size, tld->stats); + segment->mem_is_committed = true; } - else if (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset)) { + if (!segment->mem_is_fixed && + (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) { _mi_mem_unreset(segment, segment->segment_size, tld->stats); } } else { // Allocate the segment from the OS size_t memid; - segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &memid, os_tld); + bool mem_large = (eager && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy + segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { + // ensure the initial info is committed _mi_mem_commit(segment, info_size, tld->stats); } segment->memid = memid; + segment->mem_is_fixed = mem_large; + segment->mem_is_committed = commit; mi_segments_track_size((long)segment_size, tld); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - // zero the segment info - { size_t memid = segment->memid; - memset(segment, 0, info_size); - segment->memid = memid; - } + // zero the segment info (but not the `mem` fields) + ptrdiff_t ofs = offsetof(mi_segment_t,next); + memset((uint8_t*)segment + ofs, 0, info_size - ofs); + // guard pages if (mi_option_is_enabled(mi_option_secure) && !protection_still_good) { // in secure mode, we set up a protected page in between the segment info // and the page data @@ -386,6 +390,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } } + // initialize segment->page_kind = page_kind; segment->capacity = capacity; segment->page_shift = page_shift; @@ -453,13 +458,14 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) if (!page->segment_in_use) { if (page->is_reset || !page->is_committed) { size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - mi_assert_internal(!(page->is_reset && !page->is_committed)); + uint8_t* start = _mi_page_start(segment, page, &psize); if (!page->is_committed) { + mi_assert_internal(!segment->mem_is_fixed); page->is_committed = true; _mi_mem_commit(start,psize,stats); } if (page->is_reset) { + mi_assert_internal(!segment->mem_is_fixed); page->is_reset = false; _mi_mem_unreset(start, psize, stats); } @@ -488,22 +494,17 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_sta _mi_stat_decrease(&stats->pages, 1); // reset the page memory to reduce memory pressure? - if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { + if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; _mi_mem_reset(start, psize, stats); } - // zero the page data - uint8_t idx = page->segment_idx; // don't clear the index - bool is_reset = page->is_reset; // don't clear the reset flag - bool is_committed = page->is_committed; // don't clear the commit flag - memset(page, 0, sizeof(*page)); - page->segment_idx = idx; + // zero the page data, but not the segment fields + ptrdiff_t ofs = offsetof(mi_page_t,capacity); + memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); page->segment_in_use = false; - page->is_reset = is_reset; - page->is_committed = is_committed; segment->used--; } From b72a2d9659216dcf352a69287f39fec7798d305d Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 27 Aug 2019 18:43:50 +0100 Subject: [PATCH 30/40] macOS: anonymous page ID make it as env var. --- include/mimalloc.h | 1 + src/options.c | 3 ++- src/os.c | 4 +++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 4f13bc1f..41514d3e 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -232,6 +232,7 @@ typedef enum mi_option_e { mi_option_page_reset, mi_option_cache_reset, mi_option_reset_decommits, + mi_option_os_tag, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index 16c50f11..5f2ad896 100644 --- a/src/options.c +++ b/src/options.c @@ -68,7 +68,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, { 0, UNINIT, MI_OPTION(cache_reset) }, - { 0, UNINIT, MI_OPTION(reset_decommits) } // note: cannot enable this if secure is on + { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on + { 100, UNINIT, MI_OPTION(os_tag) } // only apple specific for now but might serve more or less related purpose }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index fb36f3fc..0a733aa3 100644 --- a/src/os.c +++ b/src/os.c @@ -288,7 +288,9 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) #endif #if defined(VM_MAKE_TAG) // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) - fd = VM_MAKE_TAG(100); + int os_tag = (int)mi_option_get(mi_option_os_tag); + if (os_tag < 100 || os_tag > 255) os_tag = 100; + fd = VM_MAKE_TAG(os_tag); #endif if (use_large_os_page(size, try_alignment)) { static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; From 18e02c3766d9fd8fc47fef2f0346645487d967ff Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 27 Aug 2019 17:02:56 -0700 Subject: [PATCH 31/40] try allocating non-eager segments in non-fixed memory --- src/memory.c | 24 ++++++++++++++++++------ src/options.c | 2 +- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/memory.c b/src/memory.c index a9b87b8e..f53b1ec3 100644 --- a/src/memory.c +++ b/src/memory.c @@ -262,7 +262,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc mi_assert_internal((m >> bitidx) == mask); // no overflow? uintptr_t newmap = map | m; mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { + if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { // TODO: use strong cas here? // no success, another thread claimed concurrently.. keep going map = mi_atomic_read(®ion->map); continue; @@ -299,12 +299,24 @@ static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, b mi_assert_internal(idx < MI_REGION_MAX); mem_region_t* region = ®ions[idx]; uintptr_t m = mi_atomic_read_relaxed(®ion->map); - if (m != MI_REGION_MAP_FULL) { // some bits are zero - return mi_region_alloc_blocks(region, idx, blocks, size, commit, large, p, id, tld); - } - else { - return true; // no error, but no success either + if (m != MI_REGION_MAP_FULL) { // some bits are zero + bool ok = (commit || *large); // committing or allow-large is always ok + if (!ok) { + // otherwise skip incompatible regions if possible. + // this is not guaranteed due to multiple threads allocating at the same time but + // that's ok. In secure mode, large is never allowed so that works out; otherwise + // we might just not be able to reset/decommit individual pages sometimes. + mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); + bool is_large; + bool is_committed; + void* start = mi_region_info_read(info,&is_large,&is_committed); + ok = (start == NULL || (commit || !is_committed) || (*large || !is_large)); // Todo: test with one bitmap operation? + } + if (ok) { + return mi_region_alloc_blocks(region, idx, blocks, size, commit, large, p, id, tld); + } } + return true; // no error, but no success either } /* ---------------------------------------------------------------------------- diff --git a/src/options.c b/src/options.c index 1d030830..1076ce1e 100644 --- a/src/options.c +++ b/src/options.c @@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] = // the following options are experimental and not all combinations make sense. { 1, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled #ifdef _WIN32 // and BSD? - { 1, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) + { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) #else { 1, UNINIT, MI_OPTION(eager_region_commit) }, #endif From a551f3abc470695335bf7368012fe401260bde56 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 27 Aug 2019 18:08:03 -0700 Subject: [PATCH 32/40] more precise commit statistics --- src/memory.c | 12 +++++++++--- src/os.c | 26 +++++++++++++++----------- src/stats.c | 1 + 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/memory.c b/src/memory.c index f53b1ec3..3a465d86 100644 --- a/src/memory.c +++ b/src/memory.c @@ -47,6 +47,7 @@ bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, mi_stats_t* stats); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); bool _mi_os_is_huge_reserved(void* p); +void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); // Constants #if (MI_INTPTR_SIZE==8) @@ -179,7 +180,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit } if (start != NULL) { // free it if we didn't succeed to save it to some other region - _mi_os_free(start, MI_REGION_SIZE, tld->stats); + _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); } // and continue with the memory at our index info = mi_atomic_read(®ion->info); @@ -426,6 +427,10 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // _mi_os_reset(p,size,stats); // _mi_os_decommit(p,size,stats); // if !is_committed } + if (!is_eager_committed) { + // adjust commit statistics as we commit again when re-using the same slot + _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); + } // TODO: should we free empty regions? currently only done _mi_mem_collect. // this frees up virtual address space which might be useful on 32-bit systems? @@ -456,9 +461,10 @@ void _mi_mem_collect(mi_stats_t* stats) { } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); if (m == 0) { // on success, free the whole region (unless it was huge reserved) - void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, NULL); + bool is_eager_committed; + void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); if (start != NULL && !_mi_os_is_huge_reserved(start)) { - _mi_os_free(start, MI_REGION_SIZE, stats); + _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats); } // and release mi_atomic_write(®ion->info,0); diff --git a/src/os.c b/src/os.c index 566d204d..76778123 100644 --- a/src/os.c +++ b/src/os.c @@ -170,7 +170,7 @@ void _mi_os_init() { Raw allocation on Windows (VirtualAlloc) and Unix's (mmap). ----------------------------------------------------------- */ -static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) +static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) { if (addr == NULL || size == 0 || _mi_os_is_huge_reserved(addr)) return true; bool err = false; @@ -181,7 +181,7 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats) #else err = (munmap(addr, size) == -1); #endif - _mi_stat_decrease(&stats->committed, size); // TODO: what if never committed? + if (was_committed) _mi_stat_decrease(&stats->committed, size); _mi_stat_decrease(&stats->reserved, size); if (err) { #pragma warning(suppress:4996) @@ -461,7 +461,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, // if not aligned, free it, overallocate, and unmap around it if (((uintptr_t)p % alignment != 0)) { - mi_os_mem_free(p, size, stats); + mi_os_mem_free(p, size, commit, stats); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow size_t over_size = size + alignment; @@ -484,12 +484,12 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, } else { // otherwise free and allocate at an aligned address in there - mi_os_mem_free(p, over_size, stats); + mi_os_mem_free(p, over_size, commit, stats); void* aligned_p = mi_align_up_ptr(p, alignment); p = mi_win_virtual_alloc(aligned_p, size, alignment, flags, false, allow_large, is_large); if (p == aligned_p) break; // success! if (p != NULL) { // should not happen? - mi_os_mem_free(p, size, stats); + mi_os_mem_free(p, size, commit, stats); p = NULL; } } @@ -504,8 +504,8 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t mid_size = _mi_align_up(size, _mi_os_page_size()); size_t post_size = over_size - pre_size - mid_size; mi_assert_internal(pre_size < over_size && post_size < over_size && mid_size >= size); - if (pre_size > 0) mi_os_mem_free(p, pre_size, stats); - if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, stats); + if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); + if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); // we can return the aligned pointer on `mmap` systems p = aligned_p; #endif @@ -526,10 +526,14 @@ void* _mi_os_alloc(size_t size, mi_stats_t* stats) { return mi_os_mem_alloc(size, 0, true, false, &is_large, stats); } -void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { +void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats) { if (size == 0 || p == NULL) return; size = mi_os_good_alloc_size(size, 0); - mi_os_mem_free(p, size, stats); + mi_os_mem_free(p, size, was_committed, stats); +} + +void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { + _mi_os_free_ex(p, size, true, stats); } void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld) @@ -650,7 +654,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); - #if 0 + #if 1 if (p == start) { VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set } @@ -753,7 +757,7 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { // we cannot shrink on windows, but we can decommit return _mi_os_decommit(start, size, stats); #else - return mi_os_mem_free(start, size, stats); + return mi_os_mem_free(start, size, true, stats); #endif } diff --git a/src/stats.c b/src/stats.c index 075234b8..292bc84b 100644 --- a/src/stats.c +++ b/src/stats.c @@ -85,6 +85,7 @@ static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64 mi_atomic_add64( &stat->allocated, src->allocated * unit); mi_atomic_add64( &stat->current, src->current * unit); mi_atomic_add64( &stat->freed, src->freed * unit); + // peak scores do not work across threads.. mi_atomic_add64( &stat->peak, src->peak * unit); } From 9af51506a65f3597ca2b183ebcde1491e3271ed8 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 28 Aug 2019 11:58:30 -0700 Subject: [PATCH 33/40] track commit and is_large status more precisely --- include/mimalloc-internal.h | 3 +- include/mimalloc.h | 2 ++ src/memory.c | 65 +++++++++++++++++++++++-------------- src/options.c | 2 ++ src/os.c | 15 ++++----- src/segment.c | 8 ++--- 6 files changed, 57 insertions(+), 38 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 9b3a3907..97619765 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -45,8 +45,7 @@ void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocat void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data // memory.c -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, size_t* id, mi_os_tld_t* tld); -void* _mi_mem_alloc(size_t size, bool commit, bool* large, size_t* id, mi_os_tld_t* tld); +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, size_t* id, mi_os_tld_t* tld); void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats); diff --git a/include/mimalloc.h b/include/mimalloc.h index 0357d633..5cec05fa 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -225,11 +225,13 @@ typedef enum mi_option_e { mi_option_verbose, // the following options are experimental mi_option_secure, + mi_option_lazy_commit, mi_option_eager_commit, mi_option_eager_region_commit, mi_option_large_os_pages, // implies eager commit mi_option_reserve_huge_os_pages, mi_option_segment_cache, + mi_option_segment_reset, mi_option_page_reset, mi_option_cache_reset, mi_option_reset_decommits, diff --git a/src/memory.c b/src/memory.c index 3a465d86..d8cb204e 100644 --- a/src/memory.c +++ b/src/memory.c @@ -46,8 +46,9 @@ bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, mi_stats_t* stats); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -bool _mi_os_is_huge_reserved(void* p); void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); +void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); +bool _mi_os_is_huge_reserved(void* p); // Constants #if (MI_INTPTR_SIZE==8) @@ -137,7 +138,7 @@ Commit from a region // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld) +static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool* commit, bool* allow_large, void** p, size_t* id, mi_os_tld_t* tld) { size_t mask = mi_region_block_mask(blocks,bitidx); mi_assert_internal(mask != 0); @@ -149,9 +150,16 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit if (info == 0) { bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); - bool region_large = region_commit && *large; - void* start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); - *large = region_large; + bool region_large = *allow_large; + void* start = NULL; + if (region_large) { + start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN); + if (start != NULL) { region_commit = true; } + } + if (start == NULL) { + start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); + } + mi_assert_internal(!(region_large && !*allow_large)); if (start == NULL) { // failure to allocate from the OS! unclaim the blocks and fail @@ -191,13 +199,22 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // Commit the blocks to memory bool region_is_committed = false; - void* start = mi_region_info_read(info,large,®ion_is_committed); + bool region_is_large = false; + void* start = mi_region_info_read(info,®ion_is_large,®ion_is_committed); + mi_assert_internal(!(region_is_large && !*allow_large)); + void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); - if (commit && !region_is_committed) { + if (*commit && !region_is_committed) { + // ensure commit _mi_os_commit(blocks_start, mi_good_commit_size(size), tld->stats); // only commit needed size (unless using large OS pages) } + else if (!*commit && region_is_committed) { + // but even when no commit is requested, we might have committed anyway (in a huge OS page for example) + *commit = true; + } - // and return the allocation + // and return the allocation + *allow_large = region_is_large; *p = blocks_start; *id = (idx*MI_REGION_MAP_BITS) + bitidx; return true; @@ -241,7 +258,7 @@ static inline size_t mi_bsr(uintptr_t x) { // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld) +static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool* commit, bool* allow_large, void** p, size_t* id, mi_os_tld_t* tld) { mi_assert_internal(p != NULL && id != NULL); mi_assert_internal(blocks < MI_REGION_MAP_BITS); @@ -271,7 +288,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc else { // success, we claimed the bits // now commit the block memory -- this can still fail - return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, large, p, id, tld); + return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, allow_large, p, id, tld); } } else { @@ -294,27 +311,27 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld) +static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, bool* commit, bool* allow_large, void** p, size_t* id, mi_os_tld_t* tld) { // check if there are available blocks in the region.. mi_assert_internal(idx < MI_REGION_MAX); mem_region_t* region = ®ions[idx]; uintptr_t m = mi_atomic_read_relaxed(®ion->map); if (m != MI_REGION_MAP_FULL) { // some bits are zero - bool ok = (commit || *large); // committing or allow-large is always ok + bool ok = (*commit || *allow_large); // committing or allow-large is always ok if (!ok) { // otherwise skip incompatible regions if possible. // this is not guaranteed due to multiple threads allocating at the same time but - // that's ok. In secure mode, large is never allowed so that works out; otherwise - // we might just not be able to reset/decommit individual pages sometimes. + // that's ok. In secure mode, large is never allowed for any thread, so that works out; + // otherwise we might just not be able to reset/decommit individual pages sometimes. mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); bool is_large; bool is_committed; void* start = mi_region_info_read(info,&is_large,&is_committed); - ok = (start == NULL || (commit || !is_committed) || (*large || !is_large)); // Todo: test with one bitmap operation? + ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation? } if (ok) { - return mi_region_alloc_blocks(region, idx, blocks, size, commit, large, p, id, tld); + return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, p, id, tld); } } return true; // no error, but no success either @@ -326,7 +343,7 @@ static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, b // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, size_t* id, mi_os_tld_t* tld) +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, size_t* id, mi_os_tld_t* tld) { mi_assert_internal(id != NULL && tld != NULL); mi_assert_internal(size > 0); @@ -336,7 +353,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* la // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`) if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { - return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, tld); // round up size + return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld); // round up size } // always round size to OS page size multiple (so commit/decommit go over the entire range) @@ -371,6 +388,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* la } else { tld->region_idx = idx; // next start of search? + } mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); @@ -378,10 +396,6 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* la } -// Allocate `size` memory. Return non NULL on success, with a given memory `id`. -void* _mi_mem_alloc(size_t size, bool commit, bool* large, size_t* id, mi_os_tld_t* tld) { - return _mi_mem_alloc_aligned(size,0,commit,large,id,tld); -} /* ---------------------------------------------------------------------------- Free @@ -424,8 +438,11 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // if the memory is reused soon. // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large if (!is_large) { - // _mi_os_reset(p,size,stats); - // _mi_os_decommit(p,size,stats); // if !is_committed + if (mi_option_is_enabled(mi_option_segment_reset)) { + _mi_os_reset(p, size, stats); + // _mi_os_decommit(p,size,stats); // if !is_eager_committed + } + // else { _mi_os_reset(p,size,stats); } } if (!is_eager_committed) { // adjust commit statistics as we commit again when re-using the same slot diff --git a/src/options.c b/src/options.c index 1076ce1e..e5c0c96a 100644 --- a/src/options.c +++ b/src/options.c @@ -58,6 +58,7 @@ static mi_option_desc_t options[_mi_option_last] = #endif // the following options are experimental and not all combinations make sense. + { 0, UNINIT, MI_OPTION(lazy_commit) }, // the first N segments per thread are lazily committed { 1, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled #ifdef _WIN32 // and BSD? { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) @@ -67,6 +68,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread + { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free { 0, UNINIT, MI_OPTION(page_reset) }, { 0, UNINIT, MI_OPTION(cache_reset) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on diff --git a/src/os.c b/src/os.c index 76778123..58abafe0 100644 --- a/src/os.c +++ b/src/os.c @@ -35,9 +35,9 @@ terms of the MIT license. A copy of the license can be found in the file On windows initializes support for aligned allocation and large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). ----------------------------------------------------------- */ -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -bool _mi_os_is_huge_reserved(void* p); -static void* mi_os_alloc_from_huge_reserved(size_t size, size_t try_alignment, bool commit); +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_is_huge_reserved(void* p); +void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); static void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); @@ -418,8 +418,8 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo if (!commit) allow_large = false; void* p = NULL; - if (allow_large) { - p = mi_os_alloc_from_huge_reserved(size, try_alignment, commit); + if (commit && allow_large) { + p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); if (p != NULL) { *is_large = true; return p; @@ -781,12 +781,11 @@ bool _mi_os_is_huge_reserved(void* p) { (uint8_t*)p < (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + mi_atomic_read(&os_huge_reserved.reserved)); } -static void* mi_os_alloc_from_huge_reserved(size_t size, size_t try_alignment, bool commit) +void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment) { // only allow large aligned allocations if (size < MI_SEGMENT_SIZE || (size % MI_SEGMENT_SIZE) != 0) return NULL; - if (try_alignment > MI_SEGMENT_SIZE) return NULL; - if (!commit) return NULL; + if (try_alignment > MI_SEGMENT_SIZE) return NULL; if (mi_atomic_read_ptr(&os_huge_reserved.start)==NULL) return NULL; if (mi_atomic_read(&os_huge_reserved.used) >= mi_atomic_read(&os_huge_reserved.reserved)) return NULL; // already full diff --git a/src/segment.c b/src/segment.c index 020d53e8..441d79b8 100644 --- a/src/segment.c +++ b/src/segment.c @@ -326,8 +326,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Try to get it from our thread local cache first - bool eager = mi_option_is_enabled(mi_option_eager_commit); - bool commit = eager || (page_kind > MI_PAGE_MEDIUM); + bool lazy = (tld->count < mi_option_get(mi_option_lazy_commit)); + bool commit = (!lazy && mi_option_is_enabled(mi_option_eager_commit)) || (page_kind > MI_PAGE_MEDIUM); bool protection_still_good = false; mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { @@ -353,8 +353,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, else { // Allocate the segment from the OS size_t memid; - bool mem_large = (eager && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy - segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, &memid, os_tld); + bool mem_large = (!lazy && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy + segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { // ensure the initial info is committed From d381fcd9fa3b70778cda8894476886645778e3da Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 28 Aug 2019 12:09:23 -0700 Subject: [PATCH 34/40] rename lazy to eager_commit_delay --- include/mimalloc.h | 4 ++-- src/options.c | 4 ++-- src/segment.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 5cec05fa..15d06bef 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -225,16 +225,16 @@ typedef enum mi_option_e { mi_option_verbose, // the following options are experimental mi_option_secure, - mi_option_lazy_commit, mi_option_eager_commit, mi_option_eager_region_commit, mi_option_large_os_pages, // implies eager commit mi_option_reserve_huge_os_pages, mi_option_segment_cache, - mi_option_segment_reset, mi_option_page_reset, mi_option_cache_reset, mi_option_reset_decommits, + mi_option_eager_commit_delay, + mi_option_segment_reset, mi_option_os_tag, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index e5c0c96a..8654550e 100644 --- a/src/options.c +++ b/src/options.c @@ -58,7 +58,6 @@ static mi_option_desc_t options[_mi_option_last] = #endif // the following options are experimental and not all combinations make sense. - { 0, UNINIT, MI_OPTION(lazy_commit) }, // the first N segments per thread are lazily committed { 1, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled #ifdef _WIN32 // and BSD? { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) @@ -68,10 +67,11 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free { 0, UNINIT, MI_OPTION(page_reset) }, { 0, UNINIT, MI_OPTION(cache_reset) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free { 100, UNINIT, MI_OPTION(os_tag) } // only apple specific for now but might serve more or less related purpose }; diff --git a/src/segment.c b/src/segment.c index 441d79b8..3777e060 100644 --- a/src/segment.c +++ b/src/segment.c @@ -326,8 +326,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Try to get it from our thread local cache first - bool lazy = (tld->count < mi_option_get(mi_option_lazy_commit)); - bool commit = (!lazy && mi_option_is_enabled(mi_option_eager_commit)) || (page_kind > MI_PAGE_MEDIUM); + bool eager = mi_option_is_enabled(mi_option_eager_commit) && (tld->count < mi_option_get(mi_option_eager_commit_delay)); + bool commit = eager || (page_kind > MI_PAGE_MEDIUM); bool protection_still_good = false; mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { @@ -353,7 +353,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, else { // Allocate the segment from the OS size_t memid; - bool mem_large = (!lazy && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy + bool mem_large = (eager && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { From 154fd471a111baec5afd36a66b49d9b0850d392c Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 29 Aug 2019 07:48:15 -0700 Subject: [PATCH 35/40] fix comparison warning --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index 3777e060..d442d521 100644 --- a/src/segment.c +++ b/src/segment.c @@ -326,7 +326,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Try to get it from our thread local cache first - bool eager = mi_option_is_enabled(mi_option_eager_commit) && (tld->count < mi_option_get(mi_option_eager_commit_delay)); + bool eager = mi_option_is_enabled(mi_option_eager_commit) && (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); bool commit = eager || (page_kind > MI_PAGE_MEDIUM); bool protection_still_good = false; mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); From e8c750585390c3da5bc71f60c9ca7339bb77e20f Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 29 Aug 2019 07:49:40 -0700 Subject: [PATCH 36/40] only set has_aligned flag if really necessary --- src/alloc-aligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 24f6c440..97f4319f 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -43,10 +43,10 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* heap, size_t size, size_t if (p == NULL) return NULL; // .. and align within the allocation - mi_page_set_has_aligned( _mi_ptr_page(p), true ); uintptr_t adjust = alignment - (((uintptr_t)p + offset) % alignment); mi_assert_internal(adjust % sizeof(uintptr_t) == 0); void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); + if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); mi_assert_internal( p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p),_mi_ptr_page(aligned_p),aligned_p) ); return aligned_p; From 64c1d6de8688c5d53165a6eed5d2ed3613191863 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 29 Aug 2019 07:50:35 -0700 Subject: [PATCH 37/40] fix mi_likely branch that was marked as unlikely --- src/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index f45f43e3..74c3d88e 100644 --- a/src/page.c +++ b/src/page.c @@ -180,7 +180,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // and the local free list if (page->local_free != NULL) { - if (mi_unlikely(page->free == NULL)) { + if (mi_likely(page->free == NULL)) { // usual case page->free = page->local_free; page->local_free = NULL; From 4b39c0b06edae29ca9b0bc0ca9e3938538a76a13 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 29 Aug 2019 07:55:57 -0700 Subject: [PATCH 38/40] fix eager_delay test, and allow large OS pages even without eager commit enabled --- src/segment.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/segment.c b/src/segment.c index d442d521..b03547b3 100644 --- a/src/segment.c +++ b/src/segment.c @@ -326,7 +326,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Try to get it from our thread local cache first - bool eager = mi_option_is_enabled(mi_option_eager_commit) && (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (page_kind > MI_PAGE_MEDIUM); bool protection_still_good = false; mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); @@ -353,7 +354,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, else { // Allocate the segment from the OS size_t memid; - bool mem_large = (eager && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy + bool mem_large = (!eager_delay && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { From 4819d3f78f5c803961faa94b51fbe6e7179365f7 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 29 Aug 2019 09:01:00 -0700 Subject: [PATCH 39/40] expose mi_stats_merge function --- include/mimalloc.h | 1 + src/stats.c | 34 +++++++++++++++++----------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 15d06bef..9fd455da 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -110,6 +110,7 @@ mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; mi_decl_export void mi_collect(bool force) mi_attr_noexcept; mi_decl_export void mi_stats_print(FILE* out) mi_attr_noexcept; mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; +mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept; mi_decl_export void mi_process_init(void) mi_attr_noexcept; diff --git a/src/stats.c b/src/stats.c index 292bc84b..1ecc8b3a 100644 --- a/src/stats.c +++ b/src/stats.c @@ -11,19 +11,6 @@ terms of the MIT license. A copy of the license can be found in the file #include // memset -/* ----------------------------------------------------------- - Merge thread statistics with the main one. ------------------------------------------------------------ */ - -static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src); - -void _mi_stats_done(mi_stats_t* stats) { - if (stats == &_mi_stats_main) return; - mi_stats_add(&_mi_stats_main, stats); - memset(stats,0,sizeof(*stats)); -} - - /* ----------------------------------------------------------- Statistics operations ----------------------------------------------------------- */ @@ -294,6 +281,13 @@ static mi_stats_t* mi_stats_get_default(void) { return &heap->tld->stats; } +static void mi_stats_merge_from(mi_stats_t* stats) { + if (stats != &_mi_stats_main) { + mi_stats_add(&_mi_stats_main, stats); + memset(stats, 0, sizeof(mi_stats_t)); + } +} + void mi_stats_reset(void) mi_attr_noexcept { mi_stats_t* stats = mi_stats_get_default(); if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); } @@ -301,11 +295,17 @@ void mi_stats_reset(void) mi_attr_noexcept { mi_time_start = _mi_clock_start(); } +void mi_stats_merge(void) mi_attr_noexcept { + mi_stats_merge_from( mi_stats_get_default() ); +} + +void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` + mi_stats_merge_from(stats); +} + + static void mi_stats_print_ex(mi_stats_t* stats, double secs, FILE* out) { - if (stats != &_mi_stats_main) { - mi_stats_add(&_mi_stats_main,stats); - memset(stats,0,sizeof(mi_stats_t)); - } + mi_stats_merge_from(stats); _mi_stats_print(&_mi_stats_main, secs, out); } From 7bf12c7b5fbb2aa7c156360a145545c7a45be90f Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 29 Aug 2019 09:42:50 -0700 Subject: [PATCH 40/40] make output function configurable; remove from standard includes --- include/mimalloc-internal.h | 2 +- include/mimalloc.h | 16 +++++++---- src/alloc-override-win.c | 1 + src/options.c | 57 +++++++++++++++++++++++-------------- src/stats.c | 26 ++++++++--------- 5 files changed, 61 insertions(+), 41 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 97619765..3ddb734d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file // "options.c" -void _mi_fprintf(FILE* out, const char* fmt, ...); +void _mi_fprintf(mi_output_fun* out, const char* fmt, ...); void _mi_error_message(const char* fmt, ...); void _mi_warning_message(const char* fmt, ...); void _mi_verbose_message(const char* fmt, ...); diff --git a/include/mimalloc.h b/include/mimalloc.h index 9fd455da..ed75f617 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -69,8 +69,8 @@ terms of the MIT license. A copy of the license can be found in the file // Includes // ------------------------------------------------------ +#include // size_t #include // bool -#include // FILE #ifdef __cplusplus extern "C" { @@ -107,19 +107,23 @@ mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; +typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); +mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept; + +typedef void (mi_output_fun)(const char* msg); +mi_decl_export void mi_register_output(mi_output_fun* out) mi_attr_noexcept; + mi_decl_export void mi_collect(bool force) mi_attr_noexcept; -mi_decl_export void mi_stats_print(FILE* out) mi_attr_noexcept; +mi_decl_export int mi_version(void) mi_attr_noexcept; mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; -mi_decl_export int mi_version(void) mi_attr_noexcept; +mi_decl_export void mi_stats_print(mi_output_fun* out) mi_attr_noexcept; mi_decl_export void mi_process_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_done(void) mi_attr_noexcept; -mi_decl_export void mi_thread_stats_print(FILE* out) mi_attr_noexcept; +mi_decl_export void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept; -typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); -mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept; // ------------------------------------------------------ // Aligned allocation diff --git a/src/alloc-override-win.c b/src/alloc-override-win.c index 0bd05deb..dc4796ab 100644 --- a/src/alloc-override-win.c +++ b/src/alloc-override-win.c @@ -16,6 +16,7 @@ terms of the MIT license. A copy of the license can be found in the file #include #include // getenv +#include // _setmaxstdio #include // strstr diff --git a/src/options.c b/src/options.c index 8654550e..cd9b4e48 100644 --- a/src/options.c +++ b/src/options.c @@ -134,6 +134,32 @@ void mi_option_disable(mi_option_t option) { } +static void mi_out_stderr(const char* msg) { + #ifdef _WIN32 + // on windows with redirection, the C runtime cannot handle locale dependent output + // after the main thread closes so we use direct console output. + _cputs(msg); + #else + fputs(msg, stderr); + #endif +} + +// -------------------------------------------------------- +// Default output handler +// -------------------------------------------------------- + +static volatile _Atomic(mi_output_fun*) mi_out_default; // = NULL + +static mi_output_fun* mi_out_get_default(void) { + mi_output_fun* out = (mi_output_fun*)mi_atomic_read_ptr(mi_atomic_cast(void*, &mi_out_default)); + return (out == NULL ? &mi_out_stderr : out); +} + +void mi_register_output(mi_output_fun* out) mi_attr_noexcept { + mi_atomic_write_ptr(mi_atomic_cast(void*,&mi_out_default),out); +} + + // -------------------------------------------------------- // Messages // -------------------------------------------------------- @@ -146,31 +172,20 @@ static mi_decl_thread bool recurse = false; // Define our own limited `fprintf` that avoids memory allocation. // We do this using `snprintf` with a limited buffer. -static void mi_vfprintf( FILE* out, const char* prefix, const char* fmt, va_list args ) { +static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) { char buf[256]; if (fmt==NULL) return; if (_mi_preloading() || recurse) return; recurse = true; - if (out==NULL) out = stdout; + if (out==NULL) out = mi_out_get_default(); vsnprintf(buf,sizeof(buf)-1,fmt,args); - #ifdef _WIN32 - // on windows with redirection, the C runtime cannot handle locale dependent output - // after the main thread closes so use direct console output. - if (out==stderr) { - if (prefix != NULL) _cputs(prefix); - _cputs(buf); - } - else - #endif - { - if (prefix != NULL) fputs(prefix,out); - fputs(buf,out); - } + if (prefix != NULL) out(prefix); + out(buf); recurse = false; return; } -void _mi_fprintf( FILE* out, const char* fmt, ... ) { +void _mi_fprintf( mi_output_fun* out, const char* fmt, ... ) { va_list args; va_start(args,fmt); mi_vfprintf(out,NULL,fmt,args); @@ -181,7 +196,7 @@ void _mi_trace_message(const char* fmt, ...) { if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher va_list args; va_start(args, fmt); - mi_vfprintf(stderr, "mimalloc: ", fmt, args); + mi_vfprintf(NULL, "mimalloc: ", fmt, args); va_end(args); } @@ -189,7 +204,7 @@ void _mi_verbose_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_verbose)) return; va_list args; va_start(args,fmt); - mi_vfprintf(stderr, "mimalloc: ", fmt, args); + mi_vfprintf(NULL, "mimalloc: ", fmt, args); va_end(args); } @@ -198,7 +213,7 @@ void _mi_error_message(const char* fmt, ...) { if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; va_list args; va_start(args,fmt); - mi_vfprintf(stderr, "mimalloc: error: ", fmt, args); + mi_vfprintf(NULL, "mimalloc: error: ", fmt, args); va_end(args); mi_assert(false); } @@ -208,14 +223,14 @@ void _mi_warning_message(const char* fmt, ...) { if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; va_list args; va_start(args,fmt); - mi_vfprintf(stderr, "mimalloc: warning: ", fmt, args); + mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args); va_end(args); } #if MI_DEBUG void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { - _mi_fprintf(stderr,"mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); + _mi_fprintf(NULL,"mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); abort(); } #endif diff --git a/src/stats.c b/src/stats.c index 1ecc8b3a..37a7bde4 100644 --- a/src/stats.c +++ b/src/stats.c @@ -8,6 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-internal.h" #include "mimalloc-atomic.h" +#include // fputs, stderr #include // memset @@ -120,7 +121,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { Display statistics ----------------------------------------------------------- */ -static void mi_printf_amount(int64_t n, int64_t unit, FILE* out, const char* fmt) { +static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char* fmt) { char buf[32]; int len = 32; const char* suffix = (unit <= 0 ? " " : "b"); @@ -141,16 +142,16 @@ static void mi_printf_amount(int64_t n, int64_t unit, FILE* out, const char* fmt } -static void mi_print_amount(int64_t n, int64_t unit, FILE* out) { +static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out) { mi_printf_amount(n,unit,out,NULL); } -static void mi_print_count(int64_t n, int64_t unit, FILE* out) { +static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out) { if (unit==1) _mi_fprintf(out,"%11s"," "); else mi_print_amount(n,0,out); } -static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, FILE* out ) { +static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out ) { _mi_fprintf(out,"%10s:", msg); if (unit>0) { mi_print_amount(stat->peak, unit, out); @@ -179,24 +180,24 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t } } -static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, FILE* out ) { +static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out ) { _mi_fprintf(out, "%10s:", msg); mi_print_amount(stat->total, -1, out); _mi_fprintf(out, "\n"); } -static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, FILE* out) { +static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) { double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count); _mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg); } -static void mi_print_header( FILE* out ) { +static void mi_print_header(mi_output_fun* out ) { _mi_fprintf(out,"%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count "); } #if MI_STAT>1 -static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, FILE* out) { +static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out) { bool found = false; char buf[64]; for (size_t i = 0; i <= max; i++) { @@ -220,8 +221,7 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); -static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_noexcept { - if (out == NULL) out = stderr; +static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_attr_noexcept { mi_print_header(out); #if MI_STAT>1 mi_stat_count_t normal = { 0,0,0,0 }; @@ -304,16 +304,16 @@ void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` } -static void mi_stats_print_ex(mi_stats_t* stats, double secs, FILE* out) { +static void mi_stats_print_ex(mi_stats_t* stats, double secs, mi_output_fun* out) { mi_stats_merge_from(stats); _mi_stats_print(&_mi_stats_main, secs, out); } -void mi_stats_print(FILE* out) mi_attr_noexcept { +void mi_stats_print(mi_output_fun* out) mi_attr_noexcept { mi_stats_print_ex(mi_stats_get_default(),_mi_clock_end(mi_time_start),out); } -void mi_thread_stats_print(FILE* out) mi_attr_noexcept { +void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept { _mi_stats_print(mi_stats_get_default(), _mi_clock_end(mi_time_start), out); }