From 6165177cb1461c49df3d3baca04d0da966544d51 Mon Sep 17 00:00:00 2001
From: Kirsten Lee <kile@microsoft.com>
Date: Tue, 20 Aug 2019 16:19:16 -0700
Subject: [PATCH 01/40] provide a set of solutions that use the vs2019
 vcruntime

---
 ide/vs2019/mimalloc-override-test.vcxproj | 165 ++++++++++++++
 ide/vs2019/mimalloc-override.vcxproj      | 243 +++++++++++++++++++++
 ide/vs2019/mimalloc-test-stress.vcxproj   | 159 ++++++++++++++
 ide/vs2019/mimalloc-test.vcxproj          | 163 ++++++++++++++
 ide/vs2019/mimalloc.sln                   |  71 ++++++
 ide/vs2019/mimalloc.vcxproj               | 252 ++++++++++++++++++++++
 6 files changed, 1053 insertions(+)
 create mode 100644 ide/vs2019/mimalloc-override-test.vcxproj
 create mode 100644 ide/vs2019/mimalloc-override.vcxproj
 create mode 100644 ide/vs2019/mimalloc-test-stress.vcxproj
 create mode 100644 ide/vs2019/mimalloc-test.vcxproj
 create mode 100644 ide/vs2019/mimalloc.sln
 create mode 100644 ide/vs2019/mimalloc.vcxproj
diff --git a/ide/vs2019/mimalloc-override-test.vcxproj b/ide/vs2019/mimalloc-override-test.vcxproj
new file mode 100644
index 00000000..d75a67e1
--- /dev/null
+++ b/ide/vs2019/mimalloc-override-test.vcxproj
@@ -0,0 +1,165 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{FEF7868F-750E-4C21-A04D-22707CC66879}</ProjectGuid>
+    <RootNamespace>mimalloc-override-test</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <ProjectName>mimalloc-override-test</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\main-override.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="mimalloc-override.vcxproj">
+      <Project>{abb5eae7-b3e6-432e-b636-333449892ea7}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj
new file mode 100644
index 00000000..1f3b7ee2
--- /dev/null
+++ b/ide/vs2019/mimalloc-override.vcxproj
@@ -0,0 +1,243 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{ABB5EAE7-B3E6-432E-B636-333449892EA7}</ProjectGuid>
+    <RootNamespace>mimalloc-override</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <ProjectName>mimalloc-override</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <SupportJustMyCode>false</SupportJustMyCode>
+    </ClCompile>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>
+      </Message>
+    </PostBuildEvent>
+    <Link>
+      <EntryPointSymbol>DllEntry</EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <IgnoreSpecificDefaultLibraries>
+      </IgnoreSpecificDefaultLibraries>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <SupportJustMyCode>false</SupportJustMyCode>
+    </ClCompile>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>
+      </Message>
+    </PostBuildEvent>
+    <Link>
+      <EntryPointSymbol>DllEntry</EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <IgnoreSpecificDefaultLibraries>
+      </IgnoreSpecificDefaultLibraries>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EntryPointSymbol>DllEntry</EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>
+      </Message>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EntryPointSymbol>DllEntry</EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>
+      </Message>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h" />
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
+    <ClInclude Include="..\..\include\mimalloc-atomic.h" />
+    <ClInclude Include="..\..\include\mimalloc-types.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\alloc-aligned.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-override-win.c" />
+    <ClCompile Include="..\..\src\alloc-posix.c" />
+    <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\heap.c" />
+    <ClCompile Include="..\..\src\init.c" />
+    <ClCompile Include="..\..\src\memory.c" />
+    <ClCompile Include="..\..\src\options.c" />
+    <ClCompile Include="..\..\src\os.c" />
+    <ClCompile Include="..\..\src\page-queue.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\segment.c" />
+    <ClCompile Include="..\..\src\stats.c" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/ide/vs2019/mimalloc-test-stress.vcxproj b/ide/vs2019/mimalloc-test-stress.vcxproj
new file mode 100644
index 00000000..6aed1cc1
--- /dev/null
+++ b/ide/vs2019/mimalloc-test-stress.vcxproj
@@ -0,0 +1,159 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{FEF7958F-750E-4C21-A04D-22707CC66878}</ProjectGuid>
+    <RootNamespace>mimalloc-test-stress</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <ProjectName>mimalloc-test-stress</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\test-stress.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="mimalloc.vcxproj">
+      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/ide/vs2019/mimalloc-test.vcxproj b/ide/vs2019/mimalloc-test.vcxproj
new file mode 100644
index 00000000..1e901e45
--- /dev/null
+++ b/ide/vs2019/mimalloc-test.vcxproj
@@ -0,0 +1,163 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{FEF7858F-750E-4C21-A04D-22707CC66878}</ProjectGuid>
+    <RootNamespace>mimalloctest</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <ProjectName>mimalloc-test</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\test-api.c">
+      <AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AssemblyAndSourceCode</AssemblerOutput>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="mimalloc.vcxproj">
+      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/ide/vs2019/mimalloc.sln b/ide/vs2019/mimalloc.sln
new file mode 100644
index 00000000..aeab6b88
--- /dev/null
+++ b/ide/vs2019/mimalloc.sln
@@ -0,0 +1,71 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.28010.2016
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A}
+	EndGlobalSection
+EndGlobal
diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
new file mode 100644
index 00000000..4d9563c2
--- /dev/null
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -0,0 +1,252 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{ABB5EAE7-B3E6-432E-B636-333449892EA6}</ProjectGuid>
+    <RootNamespace>mimalloc</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <ProjectName>mimalloc</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <CompileAs>Default</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+    </ClCompile>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <CompileAs>Default</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+    </ClCompile>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <Link>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+    </Link>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <FavorSizeOrSpeed>Neither</FavorSizeOrSpeed>
+      <OmitFramePointers>false</OmitFramePointers>
+      <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
+      <CompileAs>Default</CompileAs>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
+      <FavorSizeOrSpeed>Neither</FavorSizeOrSpeed>
+      <OmitFramePointers>false</OmitFramePointers>
+      <EnableFiberSafeOptimizations>false</EnableFiberSafeOptimizations>
+      <CompileAs>Default</CompileAs>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\alloc-aligned.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-override-osx.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-override-win.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-override.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-posix.c" />
+    <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\heap.c" />
+    <ClCompile Include="..\..\src\init.c" />
+    <ClCompile Include="..\..\src\memory.c" />
+    <ClCompile Include="..\..\src\options.c" />
+    <ClCompile Include="..\..\src\page-queue.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\segment.c" />
+    <ClCompile Include="..\..\src\os.c" />
+    <ClCompile Include="..\..\src\stats.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h" />
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
+    <ClInclude Include="..\..\include\mimalloc-atomic.h" />
+    <ClInclude Include="..\..\include\mimalloc-types.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file

From eb25093b13b57cb83113527b7df47fcfb1a427c3 Mon Sep 17 00:00:00 2001
From: Daan Leijen <Daan@microsoft.com>
Date: Wed, 21 Aug 2019 09:40:57 -0700
Subject: [PATCH 02/40] fix mi_cdecl for older clang versions

---
 include/mimalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 6615e2e2..9f27e463 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -53,8 +53,8 @@ terms of the MIT license. A copy of the license can be found in the file
   #else
   #define mi_attr_alloc_size(s)       __attribute__((alloc_size(s)))
   #define mi_attr_alloc_size2(s1,s2)  __attribute__((alloc_size(s1,s2)))
-  #define mi_cdecl                    // leads to warnings... __attribute__((cdecl))
   #endif
+  #define mi_cdecl                    // leads to warnings... __attribute__((cdecl))
 #else
   #define mi_decl_thread              __thread
   #define mi_decl_export

From ee475fd8cd6005cecb2de3f7e5b87ceeab609095 Mon Sep 17 00:00:00 2001
From: Jakub Szymanski <jakubs@microsoft.com>
Date: Wed, 21 Aug 2019 11:11:36 -0700
Subject: [PATCH 03/40] add warning when no avaiable mem

---
 src/os.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/os.c b/src/os.c
index b15d58d0..bcce5d7d 100644
--- a/src/os.c
+++ b/src/os.c
@@ -257,6 +257,9 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
   if (p == NULL) {
     p = mi_win_virtual_allocx(addr, size, try_alignment, flags);
   }
+  if (p == NULL) {
+    _mi_warning_message("unable to alloc mem error: err: %i size: 0x%x \n", GetLastError(), size);
+  }
   return p;
 }
 

From 5d3bf1c8440ca300145ab6bb4d09cc3cef6257cc Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Fri, 23 Aug 2019 11:22:35 -0700
Subject: [PATCH 04/40] don't commit or reset in huge OS pages

---
 src/os.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/os.c b/src/os.c
index bcce5d7d..9edcd064 100644
--- a/src/os.c
+++ b/src/os.c
@@ -539,7 +539,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   // page align in the range, commit liberally, decommit conservative
   size_t csize;
   void* start = mi_os_page_align_areax(conservative, addr, size, &csize);
-  if (csize == 0) return true;
+  if (csize == 0 || mi_os_is_huge_reserved(addr)) return true;
   int err = 0;
   if (commit) {
     _mi_stat_increase(&stats->committed, csize);
@@ -591,7 +591,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
   // page align conservatively within the range
   size_t csize;
   void* start = mi_os_page_align_area_conservative(addr, size, &csize);
-  if (csize == 0) return true;
+  if (csize == 0 || mi_os_is_huge_reserved(addr)) return true;
   if (reset) _mi_stat_increase(&stats->reset, csize);
         else _mi_stat_decrease(&stats->reset, csize);
   if (!reset) return true; // nothing to do on unreset!
@@ -659,7 +659,9 @@ static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
   size_t csize = 0;
   void* start = mi_os_page_align_area_conservative(addr, size, &csize);
   if (csize == 0) return false;
-
+  if (mi_os_is_huge_reserved(addr)) {
+	_mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
+  }
   int err = 0;
 #ifdef _WIN32
   DWORD oldprotect = 0;

From a1c5218ff52e2b187936e6f8cd77a4f8733458e8 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Fri, 23 Aug 2019 11:33:06 -0700
Subject: [PATCH 05/40] use 4TiB area on windows 64-bit for aligned allocation

---
 src/os.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/os.c b/src/os.c
index 9edcd064..5d872036 100644
--- a/src/os.c
+++ b/src/os.c
@@ -220,7 +220,18 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
       _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) page instead (error %lx)\n", err);
     }
   }
-  
+#endif
+#if (MI_INTPTR_SIZE >= 8) 
+  // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations
+  static volatile intptr_t aligned_base = ((intptr_t)4 << 40); // starting at 4TiB
+  if (addr == NULL && try_alignment > 0 && try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) {
+	  intptr_t hint = mi_atomic_add(&aligned_base, size) - size;
+	  if (hint%try_alignment == 0) {
+		  return VirtualAlloc((void*)hint, size, flags, PAGE_READWRITE);      
+	  }
+  }
+#endif
+#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)  
   // on modern Windows try use VirtualAlloc2 for aligned allocation
   if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
     MEM_ADDRESS_REQUIREMENTS reqs = { 0 };
@@ -781,7 +792,7 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept
   // Allocate one page at the time but try to place them contiguously
   // We allocate one page at the time to be able to abort if it takes too long
   double start_t = _mi_clock_start();
-  uint8_t* start = (uint8_t*)((uintptr_t)8 << 40); // 8TiB virtual start address
+  uint8_t* start = (uint8_t*)((uintptr_t)16 << 40); // 16TiB virtual start address
   uint8_t* addr = start;  // current top of the allocations
   for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) {
     // allocate lorgu pages

From b51130970986dac99959f7dca1164bcfc2296a5f Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Fri, 23 Aug 2019 13:44:43 -0700
Subject: [PATCH 06/40] ensure volatile declaration for abandoned_next field

---
 include/mimalloc-types.h |  2 +-
 src/page-queue.c         |  1 +
 src/segment.c            | 16 +++++++++-------
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index c0778f87..dd1f05e3 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -204,7 +204,7 @@ typedef enum mi_page_kind_e {
 typedef struct mi_segment_s {
   struct mi_segment_s* next;
   struct mi_segment_s* prev;
-  struct mi_segment_s* abandoned_next;
+  volatile struct mi_segment_s* abandoned_next;
   size_t          abandoned;   // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
   size_t          used;        // count of pages in use (`used <= capacity`)
   size_t          capacity;    // count of available pages (`#free + used`)
diff --git a/src/page-queue.c b/src/page-queue.c
index e476403b..859b1d57 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -130,6 +130,7 @@ extern inline uint8_t _mi_bin(size_t size) {
     // - adjust with 3 because we use do not round the first 8 sizes
     //   which each get an exact bin
     bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
+    mi_assert_internal(bin < MI_BIN_HUGE);
   }
   mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE);
   return bin;
diff --git a/src/segment.c b/src/segment.c
index d5a2288a..6379b24a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -558,13 +558,15 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_segment_remove_from_free_queue(segment,tld);
   mi_assert_internal(segment->next == NULL && segment->prev == NULL);
   // all pages in the segment are abandoned; add it to the abandoned list
-  segment->thread_id = 0;
-  do {
-    segment->abandoned_next = (mi_segment_t*)abandoned;
-  } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, segment->abandoned_next));
-  mi_atomic_increment(&abandoned_count);
-  _mi_stat_increase(&tld->stats->segments_abandoned,1);
+  _mi_stat_increase(&tld->stats->segments_abandoned, 1);
   mi_segments_track_size(-((long)segment->segment_size), tld);
+  segment->thread_id = 0;
+  mi_segment_t* next;
+  do {
+    next = (mi_segment_t*)abandoned;
+    mi_atomic_write_ptr((volatile void**)&segment->abandoned_next, next);
+  } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, next));
+  mi_atomic_increment(&abandoned_count);
 }
 
 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@@ -598,7 +600,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
     mi_segment_t* segment;
     do {
       segment = (mi_segment_t*)abandoned;
-    } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment->abandoned_next, segment));
+    } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, (mi_segment_t*)segment->abandoned_next, segment));
     if (segment==NULL) break; // stop early if no more segments available
 
     // got it.

From acde83543f71f8fda793df196236c62d1864c04a Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Fri, 23 Aug 2019 14:08:00 -0700
Subject: [PATCH 07/40] remove threadid from pages and keep page flags separate

---
 include/mimalloc-internal.h | 25 +++++--------------------
 include/mimalloc-types.h    | 30 ++++++++++++++++--------------
 src/alloc.c                 | 10 +++++-----
 src/init.c                  |  9 +++++----
 src/os.c                    |  2 +-
 src/page.c                  |  1 -
 src/segment.c               | 18 +++++-------------
 7 files changed, 37 insertions(+), 58 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 0bdc1ab6..fa157b76 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -317,39 +317,24 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size)
 }
 
 
+
 //-----------------------------------------------------------
 // Page flags
 //-----------------------------------------------------------
-static inline uintptr_t mi_page_thread_id(const mi_page_t* page) {
-  return (page->flags & ~MI_PAGE_FLAGS_MASK);
-}
-
-static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) {
-  mi_assert_internal((thread_id & MI_PAGE_FLAGS_MASK) == 0);
-  page->flags = thread_id;
-}
-
-static inline void mi_page_set_thread_id(mi_page_t* page, uintptr_t thread_id) {
-  mi_assert_internal((thread_id & MI_PAGE_FLAGS_MASK) == 0);
-  page->flags = thread_id | (page->flags & MI_PAGE_FLAGS_MASK);
-}
-
 static inline bool mi_page_is_in_full(const mi_page_t* page) {
-  return ((page->flags & 0x01) != 0);
+  return page->flags.in_full;
 }
 
 static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
-  if (in_full) page->flags |= 0x01;
-          else page->flags &= ~0x01;
+  page->flags.in_full = in_full;
 }
 
 static inline bool mi_page_has_aligned(const mi_page_t* page) {
-  return ((page->flags & 0x02) != 0);
+  return page->flags.has_aligned;
 }
 
 static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
-  if (has_aligned) page->flags |= 0x02;
-              else page->flags &= ~0x02;
+  page->flags.has_aligned = has_aligned;
 }
 
 
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index dd1f05e3..c20b663a 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -124,12 +124,15 @@ typedef enum mi_delayed_e {
 } mi_delayed_t;
 
 
-// Use the bottom 2 bits for the `in_full` and `has_aligned` flags
-// and the rest for the threadid (we assume tid's never use those lower 2 bits).
-// This allows a single test in `mi_free` to check for unlikely cases
-// (namely, non-local free, aligned free, or freeing in a full page)
-#define MI_PAGE_FLAGS_MASK  ((uintptr_t)0x03)
-typedef uintptr_t mi_page_flags_t;
+// The `in_full` and `has_aligned` page flags are put in a union to efficiently 
+// test if both are false (`value == 0`) in the `mi_free` routine.
+typedef union mi_page_flags_u {
+  uint16_t value;
+  struct {
+    bool in_full;
+    bool has_aligned;
+  };
+} mi_page_flags_t;
 
 // Thread free list.
 // We use the bottom 2 bits of the pointer for mi_delayed_t flags
@@ -163,12 +166,12 @@ typedef struct mi_page_s {
   // layout like this to optimize access in `mi_malloc` and `mi_free`
   uint16_t              capacity;          // number of blocks committed
   uint16_t              reserved;          // number of blocks reserved in memory
-                                           // 16 bits padding
+  mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (16 bits)
+
   mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
   #if MI_SECURE
   uintptr_t             cookie;            // random cookie to encode the free lists
   #endif
-  mi_page_flags_t       flags;             // threadid:62 | has_aligned:1 | in_full:1
   size_t                used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
   
   mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
@@ -181,12 +184,11 @@ typedef struct mi_page_s {
   struct mi_page_s*     next;              // next page owned by this thread with the same `block_size`
   struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`
 
-// improve page index calculation
-#if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
-  void*                 padding[1];        // 12 words on 64-bit
-#elif MI_INTPTR_SIZE==4
-  // void*                 padding[1];         // 12 words on 32-bit
-#endif
+  // improve page index calculation
+  // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word
+  #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0)
+  void*                 padding[1];        // 12 words on 64-bit in secure mode, 12 words on 32-bit plain
+  #endif
 } mi_page_t;
 
 
diff --git a/src/alloc.c b/src/alloc.c
index 9be2ef40..b7881ea5 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -225,19 +225,19 @@ void mi_free(void* p) mi_attr_noexcept
   }
 #endif
 
+  const uintptr_t tid = _mi_thread_id();
   mi_page_t* const page = _mi_segment_page_of(segment, p);
 
 #if (MI_STAT>1)
   mi_heap_t* heap = mi_heap_get_default();
-  mi_heap_stat_decrease( heap, malloc, mi_usable_size(p));
+  mi_heap_stat_decrease(heap, malloc, mi_usable_size(p));
   if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) {
-    mi_heap_stat_decrease( heap, normal[_mi_bin(page->block_size)], 1);
+    mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1);
   }
   // huge page stat is accounted for in `_mi_page_retire`
 #endif
 
-  const uintptr_t tid = _mi_thread_id();
-  if (mi_likely(tid == page->flags)) {  // if equal, the thread id matches and it is not a full page, nor has aligned blocks
+  if (mi_likely(tid == segment->thread_id && page->flags.value == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
     // local, and not full or aligned
     mi_block_t* block = (mi_block_t*)p;
     mi_block_set_next(page, block, page->local_free);
@@ -247,7 +247,7 @@ void mi_free(void* p) mi_attr_noexcept
   }
   else {
     // non-local, aligned blocks, or a full page; use the more generic path
-    mi_free_generic(segment, page, tid == mi_page_thread_id(page), p);
+    mi_free_generic(segment, page, tid == segment->thread_id, p);
   }
 }
 
diff --git a/src/init.c b/src/init.c
index a2e7a9d2..ceb84433 100644
--- a/src/init.c
+++ b/src/init.c
@@ -13,15 +13,16 @@ terms of the MIT license. A copy of the license can be found in the file
 // Empty page used to initialize the small free pages array
 const mi_page_t _mi_page_empty = {
   0, false, false, false, 0, 0,
+  { 0 },
   NULL,    // free
   #if MI_SECURE
   0,
   #endif
-  0, 0, // flags, used
+  0,       // used
   NULL, 0, 0,
   0, NULL, NULL, NULL
-  #if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
-  , { NULL }
+  #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0)
+  , { NULL } // padding
   #endif
 };
 
@@ -350,7 +351,7 @@ void mi_thread_init(void) mi_attr_noexcept
     pthread_setspecific(mi_pthread_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_pthread_done` is called
   #endif
 
-  #if (MI_DEBUG>0) // not in release mode as that leads to crashes on Windows dynamic override
+  #if (MI_DEBUG>0) && !defined(NDEBUG) // not in release mode as that leads to crashes on Windows dynamic override
   _mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
   #endif
 }
diff --git a/src/os.c b/src/os.c
index 5d872036..b39c667a 100644
--- a/src/os.c
+++ b/src/os.c
@@ -217,7 +217,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
     }
     else {
       // else fall back to regular large OS pages
-      _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) page instead (error %lx)\n", err);
+      _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %lx)\n", err);
     }
   }
 #endif
diff --git a/src/page.c b/src/page.c
index 049ce10a..2da75119 100644
--- a/src/page.c
+++ b/src/page.c
@@ -75,7 +75,6 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
   mi_segment_t* segment = _mi_page_segment(page);
   uint8_t* start = _mi_page_start(segment,page,NULL);
   mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL));
-  mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page));
   //mi_assert_internal(start + page->capacity*page->block_size == page->top);
 
   mi_assert_internal(mi_page_list_is_valid(page,page->free));
diff --git a/src/segment.c b/src/segment.c
index 6379b24a..b1a5221c 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -549,14 +549,11 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->used > 0);
   mi_assert_internal(segment->abandoned_next == NULL);
   mi_assert_expensive(mi_segment_is_valid(segment));
-#if MI_DEBUG>1
-  for (size_t i = 0; i < segment->capacity; i++) {
-    mi_assert_internal(!segment->pages[i].segment_in_use || mi_page_thread_id(&segment->pages[i]) == 0);
-  }
-#endif
+
   // remove the segment from the free page queue if needed
   mi_segment_remove_from_free_queue(segment,tld);
   mi_assert_internal(segment->next == NULL && segment->prev == NULL);
+
   // all pages in the segment are abandoned; add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
   mi_segments_track_size(-((long)segment->segment_size), tld);
@@ -570,11 +567,10 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
 }
 
 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
-  mi_assert(page != NULL && mi_page_thread_id(page) != 0);
+  mi_assert(page != NULL);
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_expensive(mi_segment_is_valid(segment));
-  segment->abandoned++;
-  mi_page_set_thread_id(page, 0);
+  segment->abandoned++;  
   _mi_stat_increase(&tld->stats->pages_abandoned, 1);
   mi_assert_internal(segment->abandoned <= segment->used);
   if (segment->used == segment->abandoned) {
@@ -626,7 +622,6 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
         }
         else {
           // otherwise reclaim it
-          mi_page_set_thread_id(page,segment->thread_id);
           _mi_page_reclaim(heap,page);
         }
       }
@@ -656,8 +651,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
 static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(mi_segment_has_free(segment));
   mi_page_t* page = mi_segment_find_free(segment, tld->stats);
-  page->segment_in_use = true;
-  mi_page_init_flags(page,segment->thread_id);
+  page->segment_in_use = true;  
   segment->used++;
   mi_assert_internal(segment->used <= segment->capacity);
   if (segment->used == segment->capacity) {
@@ -697,7 +691,6 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_
   segment->used = 1;
   mi_page_t* page = &segment->pages[0];
   page->segment_in_use = true;
-  mi_page_init_flags(page,segment->thread_id);
   return page;
 }
 
@@ -709,7 +702,6 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
   segment->used = 1;
   mi_page_t* page = &segment->pages[0];
   page->segment_in_use = true;
-  mi_page_init_flags(page,segment->thread_id);
   return page;
 }
 

From 25ea9cf142548a65c5109798706bcc872886d93b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 23 Aug 2019 21:38:45 -0700
Subject: [PATCH 08/40] on windows use 4TiB area for aligned allocation

---
 src/os.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/os.c b/src/os.c
index 7afe447e..e7ed57b5 100644
--- a/src/os.c
+++ b/src/os.c
@@ -184,6 +184,18 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats)
 
 #ifdef _WIN32
 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
+#if (MI_INTPTR_SIZE >= 8) 
+    // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations
+  static volatile intptr_t aligned_base = ((intptr_t)4 << 40); // starting at 4TiB
+  if (addr == NULL && try_alignment > 0 &&
+      try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) 
+  {
+    intptr_t hint = mi_atomic_add(&aligned_base, size) - size;
+    if (hint%try_alignment == 0) {
+      return VirtualAlloc((void*)hint, size, flags, PAGE_READWRITE);
+    }
+  }
+#endif
 #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
   if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
     // on modern Windows try use VirtualAlloc2 for aligned allocation

From 15552eba790e7a7e6d8477236c7c51fdb9288ee0 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Fri, 23 Aug 2019 13:44:43 -0700
Subject: [PATCH 09/40] ensure volatile declaration for abandoned_next field

---
 include/mimalloc-types.h |  2 +-
 src/page-queue.c         |  1 +
 src/segment.c            | 16 +++++++++-------
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index c0778f87..dd1f05e3 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -204,7 +204,7 @@ typedef enum mi_page_kind_e {
 typedef struct mi_segment_s {
   struct mi_segment_s* next;
   struct mi_segment_s* prev;
-  struct mi_segment_s* abandoned_next;
+  volatile struct mi_segment_s* abandoned_next;
   size_t          abandoned;   // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
   size_t          used;        // count of pages in use (`used <= capacity`)
   size_t          capacity;    // count of available pages (`#free + used`)
diff --git a/src/page-queue.c b/src/page-queue.c
index e476403b..859b1d57 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -130,6 +130,7 @@ extern inline uint8_t _mi_bin(size_t size) {
     // - adjust with 3 because we use do not round the first 8 sizes
     //   which each get an exact bin
     bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
+    mi_assert_internal(bin < MI_BIN_HUGE);
   }
   mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE);
   return bin;
diff --git a/src/segment.c b/src/segment.c
index d5a2288a..6379b24a 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -558,13 +558,15 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_segment_remove_from_free_queue(segment,tld);
   mi_assert_internal(segment->next == NULL && segment->prev == NULL);
   // all pages in the segment are abandoned; add it to the abandoned list
-  segment->thread_id = 0;
-  do {
-    segment->abandoned_next = (mi_segment_t*)abandoned;
-  } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, segment->abandoned_next));
-  mi_atomic_increment(&abandoned_count);
-  _mi_stat_increase(&tld->stats->segments_abandoned,1);
+  _mi_stat_increase(&tld->stats->segments_abandoned, 1);
   mi_segments_track_size(-((long)segment->segment_size), tld);
+  segment->thread_id = 0;
+  mi_segment_t* next;
+  do {
+    next = (mi_segment_t*)abandoned;
+    mi_atomic_write_ptr((volatile void**)&segment->abandoned_next, next);
+  } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, next));
+  mi_atomic_increment(&abandoned_count);
 }
 
 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@@ -598,7 +600,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
     mi_segment_t* segment;
     do {
       segment = (mi_segment_t*)abandoned;
-    } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment->abandoned_next, segment));
+    } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, (mi_segment_t*)segment->abandoned_next, segment));
     if (segment==NULL) break; // stop early if no more segments available
 
     // got it.

From 6c6fcad242ebedba6ee07cff2d255457eb811bb8 Mon Sep 17 00:00:00 2001
From: daan <daan@microsoft.com>
Date: Fri, 23 Aug 2019 14:08:00 -0700
Subject: [PATCH 10/40] remove threadid from pages and keep page flags separate
 (cherry picked)

---
 include/mimalloc-internal.h | 25 +++++--------------------
 include/mimalloc-types.h    | 30 ++++++++++++++++--------------
 src/alloc.c                 | 10 +++++-----
 src/init.c                  |  9 +++++----
 src/page.c                  |  1 -
 src/segment.c               | 18 +++++-------------
 6 files changed, 36 insertions(+), 57 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 6455d57e..d886bcec 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -315,39 +315,24 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size)
 }
 
 
+
 //-----------------------------------------------------------
 // Page flags
 //-----------------------------------------------------------
-static inline uintptr_t mi_page_thread_id(const mi_page_t* page) {
-  return (page->flags & ~MI_PAGE_FLAGS_MASK);
-}
-
-static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) {
-  mi_assert_internal((thread_id & MI_PAGE_FLAGS_MASK) == 0);
-  page->flags = thread_id;
-}
-
-static inline void mi_page_set_thread_id(mi_page_t* page, uintptr_t thread_id) {
-  mi_assert_internal((thread_id & MI_PAGE_FLAGS_MASK) == 0);
-  page->flags = thread_id | (page->flags & MI_PAGE_FLAGS_MASK);
-}
-
 static inline bool mi_page_is_in_full(const mi_page_t* page) {
-  return ((page->flags & 0x01) != 0);
+  return page->flags.in_full;
 }
 
 static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
-  if (in_full) page->flags |= 0x01;
-          else page->flags &= ~0x01;
+  page->flags.in_full = in_full;
 }
 
 static inline bool mi_page_has_aligned(const mi_page_t* page) {
-  return ((page->flags & 0x02) != 0);
+  return page->flags.has_aligned;
 }
 
 static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
-  if (has_aligned) page->flags |= 0x02;
-              else page->flags &= ~0x02;
+  page->flags.has_aligned = has_aligned;
 }
 
 
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index dd1f05e3..c20b663a 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -124,12 +124,15 @@ typedef enum mi_delayed_e {
 } mi_delayed_t;
 
 
-// Use the bottom 2 bits for the `in_full` and `has_aligned` flags
-// and the rest for the threadid (we assume tid's never use those lower 2 bits).
-// This allows a single test in `mi_free` to check for unlikely cases
-// (namely, non-local free, aligned free, or freeing in a full page)
-#define MI_PAGE_FLAGS_MASK  ((uintptr_t)0x03)
-typedef uintptr_t mi_page_flags_t;
+// The `in_full` and `has_aligned` page flags are put in a union to efficiently 
+// test if both are false (`value == 0`) in the `mi_free` routine.
+typedef union mi_page_flags_u {
+  uint16_t value;
+  struct {
+    bool in_full;
+    bool has_aligned;
+  };
+} mi_page_flags_t;
 
 // Thread free list.
 // We use the bottom 2 bits of the pointer for mi_delayed_t flags
@@ -163,12 +166,12 @@ typedef struct mi_page_s {
   // layout like this to optimize access in `mi_malloc` and `mi_free`
   uint16_t              capacity;          // number of blocks committed
   uint16_t              reserved;          // number of blocks reserved in memory
-                                           // 16 bits padding
+  mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (16 bits)
+
   mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
   #if MI_SECURE
   uintptr_t             cookie;            // random cookie to encode the free lists
   #endif
-  mi_page_flags_t       flags;             // threadid:62 | has_aligned:1 | in_full:1
   size_t                used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
   
   mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
@@ -181,12 +184,11 @@ typedef struct mi_page_s {
   struct mi_page_s*     next;              // next page owned by this thread with the same `block_size`
   struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`
 
-// improve page index calculation
-#if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
-  void*                 padding[1];        // 12 words on 64-bit
-#elif MI_INTPTR_SIZE==4
-  // void*                 padding[1];         // 12 words on 32-bit
-#endif
+  // improve page index calculation
+  // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word
+  #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0)
+  void*                 padding[1];        // 12 words on 64-bit in secure mode, 12 words on 32-bit plain
+  #endif
 } mi_page_t;
 
 
diff --git a/src/alloc.c b/src/alloc.c
index 9be2ef40..b7881ea5 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -225,19 +225,19 @@ void mi_free(void* p) mi_attr_noexcept
   }
 #endif
 
+  const uintptr_t tid = _mi_thread_id();
   mi_page_t* const page = _mi_segment_page_of(segment, p);
 
 #if (MI_STAT>1)
   mi_heap_t* heap = mi_heap_get_default();
-  mi_heap_stat_decrease( heap, malloc, mi_usable_size(p));
+  mi_heap_stat_decrease(heap, malloc, mi_usable_size(p));
   if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) {
-    mi_heap_stat_decrease( heap, normal[_mi_bin(page->block_size)], 1);
+    mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1);
   }
   // huge page stat is accounted for in `_mi_page_retire`
 #endif
 
-  const uintptr_t tid = _mi_thread_id();
-  if (mi_likely(tid == page->flags)) {  // if equal, the thread id matches and it is not a full page, nor has aligned blocks
+  if (mi_likely(tid == segment->thread_id && page->flags.value == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
     // local, and not full or aligned
     mi_block_t* block = (mi_block_t*)p;
     mi_block_set_next(page, block, page->local_free);
@@ -247,7 +247,7 @@ void mi_free(void* p) mi_attr_noexcept
   }
   else {
     // non-local, aligned blocks, or a full page; use the more generic path
-    mi_free_generic(segment, page, tid == mi_page_thread_id(page), p);
+    mi_free_generic(segment, page, tid == segment->thread_id, p);
   }
 }
 
diff --git a/src/init.c b/src/init.c
index ec64def8..76e586f2 100644
--- a/src/init.c
+++ b/src/init.c
@@ -13,15 +13,16 @@ terms of the MIT license. A copy of the license can be found in the file
 // Empty page used to initialize the small free pages array
 const mi_page_t _mi_page_empty = {
   0, false, false, false, 0, 0,
+  { 0 },
   NULL,    // free
   #if MI_SECURE
   0,
   #endif
-  0, 0, // flags, used
+  0,       // used
   NULL, 0, 0,
   0, NULL, NULL, NULL
-  #if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
-  , { NULL }
+  #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0)
+  , { NULL } // padding
   #endif
 };
 
@@ -350,7 +351,7 @@ void mi_thread_init(void) mi_attr_noexcept
     pthread_setspecific(mi_pthread_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_pthread_done` is called
   #endif
 
-  #if (MI_DEBUG>0) // not in release mode as that leads to crashes on Windows dynamic override
+  #if (MI_DEBUG>0) && !defined(NDEBUG) // not in release mode as that leads to crashes on Windows dynamic override
   _mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
   #endif
 }
diff --git a/src/page.c b/src/page.c
index 549ced38..a7b4a760 100644
--- a/src/page.c
+++ b/src/page.c
@@ -75,7 +75,6 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
   mi_segment_t* segment = _mi_page_segment(page);
   uint8_t* start = _mi_page_start(segment,page,NULL);
   mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL));
-  mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page));
   //mi_assert_internal(start + page->capacity*page->block_size == page->top);
 
   mi_assert_internal(mi_page_list_is_valid(page,page->free));
diff --git a/src/segment.c b/src/segment.c
index 6379b24a..b1a5221c 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -549,14 +549,11 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->used > 0);
   mi_assert_internal(segment->abandoned_next == NULL);
   mi_assert_expensive(mi_segment_is_valid(segment));
-#if MI_DEBUG>1
-  for (size_t i = 0; i < segment->capacity; i++) {
-    mi_assert_internal(!segment->pages[i].segment_in_use || mi_page_thread_id(&segment->pages[i]) == 0);
-  }
-#endif
+
   // remove the segment from the free page queue if needed
   mi_segment_remove_from_free_queue(segment,tld);
   mi_assert_internal(segment->next == NULL && segment->prev == NULL);
+
   // all pages in the segment are abandoned; add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
   mi_segments_track_size(-((long)segment->segment_size), tld);
@@ -570,11 +567,10 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
 }
 
 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
-  mi_assert(page != NULL && mi_page_thread_id(page) != 0);
+  mi_assert(page != NULL);
   mi_segment_t* segment = _mi_page_segment(page);
   mi_assert_expensive(mi_segment_is_valid(segment));
-  segment->abandoned++;
-  mi_page_set_thread_id(page, 0);
+  segment->abandoned++;  
   _mi_stat_increase(&tld->stats->pages_abandoned, 1);
   mi_assert_internal(segment->abandoned <= segment->used);
   if (segment->used == segment->abandoned) {
@@ -626,7 +622,6 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
         }
         else {
           // otherwise reclaim it
-          mi_page_set_thread_id(page,segment->thread_id);
           _mi_page_reclaim(heap,page);
         }
       }
@@ -656,8 +651,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
 static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(mi_segment_has_free(segment));
   mi_page_t* page = mi_segment_find_free(segment, tld->stats);
-  page->segment_in_use = true;
-  mi_page_init_flags(page,segment->thread_id);
+  page->segment_in_use = true;  
   segment->used++;
   mi_assert_internal(segment->used <= segment->capacity);
   if (segment->used == segment->capacity) {
@@ -697,7 +691,6 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_
   segment->used = 1;
   mi_page_t* page = &segment->pages[0];
   page->segment_in_use = true;
-  mi_page_init_flags(page,segment->thread_id);
   return page;
 }
 
@@ -709,7 +702,6 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
   segment->used = 1;
   mi_page_t* page = &segment->pages[0];
   page->segment_in_use = true;
-  mi_page_init_flags(page,segment->thread_id);
   return page;
 }
 

From d04d379f66080c9a1e0618e2d834b455b7c98bd1 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Fri, 23 Aug 2019 21:44:07 -0700
Subject: [PATCH 11/40] fix merge conflicts

---
 src/os.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/src/os.c b/src/os.c
index 403f9766..a1b6cdf3 100644
--- a/src/os.c
+++ b/src/os.c
@@ -196,18 +196,6 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats)
 
 #ifdef _WIN32
 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
-#if (MI_INTPTR_SIZE >= 8) 
-    // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations
-  static volatile intptr_t aligned_base = ((intptr_t)4 << 40); // starting at 4TiB
-  if (addr == NULL && try_alignment > 0 &&
-      try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) 
-  {
-    intptr_t hint = mi_atomic_add(&aligned_base, size) - size;
-    if (hint%try_alignment == 0) {
-      return VirtualAlloc((void*)hint, size, flags, PAGE_READWRITE);
-    }
-  }
-#endif
 #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
   // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
   if ((size % (uintptr_t)1 << 30) == 0 /* 1GiB multiple */
@@ -236,7 +224,9 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
 #if (MI_INTPTR_SIZE >= 8) 
   // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations
   static volatile intptr_t aligned_base = ((intptr_t)4 << 40); // starting at 4TiB
-  if (addr == NULL && try_alignment > 0 && try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) {
+  if (addr == NULL && try_alignment > 0 && 
+      try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) 
+  {
 	  intptr_t hint = mi_atomic_add(&aligned_base, size) - size;
 	  if (hint%try_alignment == 0) {
 		  return VirtualAlloc((void*)hint, size, flags, PAGE_READWRITE);      

From 99e071cf2c3c3557c2e755911f69b7372c66fe09 Mon Sep 17 00:00:00 2001
From: Jim Huang <jserv@biilabs.io>
Date: Sat, 24 Aug 2019 21:24:56 +0800
Subject: [PATCH 12/40] Link with -lrt for older glibc

Quoted from Linux Programmer's Manual (2017-09-15):
    #include <time.h>
    int clock_gettime(clockid_t clk_id, struct timespec *tp);
    Link with -lrt (only for glibc versions before 2.17).

This patch adds additional checks for librt availability and append
target_link_libraries accordingly. librt is absent on macOS.

Fixed #139
---
 CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c9de8618..d44b3408 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -108,6 +108,10 @@ if(WIN32)
   list(APPEND mi_libraries psapi shell32 user32)
 else()
   list(APPEND mi_libraries pthread)
+  find_library(LIBRT rt)
+  if(LIBRT)
+    list(APPEND mi_libraries ${LIBRT})
+  endif()
 endif()
 
 # -----------------------------------------------------------------------------

From 23812cc0ac0e37fb2c123f1d391aecdfc372fbfc Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sat, 24 Aug 2019 15:45:14 -0700
Subject: [PATCH 13/40] do not keep a queue of huge pages and free them
 directly

---
 src/alloc.c      | 15 +++++++++++++++
 src/page-queue.c |  1 +
 src/page.c       | 30 ++++++++++++++++++------------
 src/segment.c    |  5 +++--
 4 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index b7881ea5..76e093e7 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -115,6 +115,21 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
   mi_thread_free_t tfreex;
   bool use_delayed;
 
+  mi_segment_t* segment = _mi_page_segment(page);
+  if (segment->page_kind==MI_PAGE_HUGE) {
+    // huge page segments are always abandoned and can be freed immediately
+    mi_assert_internal(segment->thread_id==0);
+    mi_assert_internal(segment->abandoned_next==NULL);
+    // claim it and free
+    mi_block_set_next(page, block, page->free);
+    page->free = block;
+    page->used--;
+    mi_heap_t* heap = mi_get_default_heap();
+    segment->thread_id = heap->thread_id;
+    _mi_segment_page_free(page,true,&heap->tld->segments);
+    return;
+  }
+
   do {
     tfree = page->thread_free;
     use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE ||
diff --git a/src/page-queue.c b/src/page-queue.c
index 859b1d57..d613095f 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -268,6 +268,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
 static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
   mi_assert_internal(page->heap == NULL);
   mi_assert_internal(!mi_page_queue_contains(queue, page));
+  mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
   mi_assert_internal(page->block_size == queue->block_size ||
                       (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) ||
                         (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
diff --git a/src/page.c b/src/page.c
index a7b4a760..f7e0ce2c 100644
--- a/src/page.c
+++ b/src/page.c
@@ -98,11 +98,13 @@ bool _mi_page_is_valid(mi_page_t* page) {
   #endif
   if (page->heap!=NULL) {
     mi_segment_t* segment = _mi_page_segment(page);
-    mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id);
-    mi_page_queue_t* pq = mi_page_queue_of(page);
-    mi_assert_internal(mi_page_queue_contains(pq, page));
-    mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
-    mi_assert_internal(mi_heap_contains_queue(page->heap,pq));
+    mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id || segment->thread_id==0);
+    if (segment->page_kind != MI_PAGE_HUGE) {
+      mi_page_queue_t* pq = mi_page_queue_of(page);
+      mi_assert_internal(mi_page_queue_contains(pq, page));
+      mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
+      mi_assert_internal(mi_heap_contains_queue(page->heap,pq));
+    }
   }
   return true;
 }
@@ -204,6 +206,7 @@ void _mi_page_free_collect(mi_page_t* page) {
 void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
   mi_assert_expensive(mi_page_is_valid_init(page));
   mi_assert_internal(page->heap == NULL);
+  mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
   _mi_page_free_collect(page);
   mi_page_queue_t* pq = mi_page_queue(heap, page->block_size);
   mi_page_queue_push(heap, pq, page);
@@ -212,12 +215,13 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
 
 // allocate a fresh page from a segment
 static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) {
-  mi_assert_internal(mi_heap_contains_queue(heap, pq));
+  mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq));
   mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os);
   if (page == NULL) return NULL;
+  mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
   mi_page_init(heap, page, block_size, &heap->tld->stats);
   _mi_stat_increase( &heap->tld->stats.pages, 1);
-  mi_page_queue_push(heap, pq, page);
+  if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
   mi_assert_expensive(_mi_page_is_valid(page));
   return page;
 }
@@ -699,13 +703,15 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept {
 // A huge page is allocated directly without being in a queue
 static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
   size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t);
-  mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
-  mi_page_queue_t* pq = mi_page_queue(heap,block_size);
-  mi_assert_internal(mi_page_queue_is_huge(pq));
-  mi_page_t* page = mi_page_fresh_alloc(heap,pq,block_size);
+  mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);  
+  mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size);
   if (page != NULL) {
     mi_assert_internal(mi_page_immediate_available(page));
     mi_assert_internal(page->block_size == block_size);
+    mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE);
+    mi_assert_internal(_mi_page_segment(page)->used==1);
+    mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
+    page->heap = NULL;
     if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) {
       _mi_stat_increase(&heap->tld->stats.giant, block_size);
       _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1);
@@ -714,7 +720,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
       _mi_stat_increase(&heap->tld->stats.huge, block_size);
       _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1);
     }
-  }
+  }  
   return page;
 }
 
diff --git a/src/segment.c b/src/segment.c
index b1a5221c..3be703cf 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -134,7 +134,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment) {
     if (!segment->pages[i].segment_in_use) nfree++;
   }
   mi_assert_internal(nfree + segment->used == segment->capacity);
-  mi_assert_internal(segment->thread_id == _mi_thread_id()); // or 0
+  mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
   mi_assert_internal(segment->page_kind == MI_PAGE_HUGE ||
                      (mi_segment_pagesize(segment) * segment->capacity == segment->segment_size));
   return true;
@@ -700,6 +700,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
   if (segment == NULL) return NULL;
   mi_assert_internal(segment->segment_size - segment->segment_info_size >= size);
   segment->used = 1;
+  segment->thread_id = 0; // huge pages are immediately abandoned
   mi_page_t* page = &segment->pages[0];
   page->segment_in_use = true;
   return page;
@@ -721,7 +722,7 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_
   else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) {
     page = mi_segment_medium_page_alloc(tld, os_tld);
   }
-  else if (block_size < MI_LARGE_OBJ_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) {
+  else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) {
     page = mi_segment_large_page_alloc(tld, os_tld);
   }
   else {

From a431d80fc38639fa4186baa0a0d13063fa7dc994 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 25 Aug 2019 10:01:11 -0700
Subject: [PATCH 14/40] better object sizes for large objects

---
 include/mimalloc-types.h | 12 +++++++-----
 src/page.c               |  5 ++++-
 src/segment.c            |  8 ++------
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index c20b663a..db39b9c4 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -91,11 +91,13 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_MEDIUM_PAGES_PER_SEGMENT       (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
 #define MI_LARGE_PAGES_PER_SEGMENT        (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE)
 
-#define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)
-#define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)   // 128kb on 64-bit
-#define MI_LARGE_OBJ_SIZE_MAX             (MI_LARGE_PAGE_SIZE/2)    // 2Mb on 64-bit
-#define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX>>MI_INTPTR_SHIFT)
-#define MI_HUGE_OBJ_SIZE_MAX              (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE)  // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
+// The max object size are checked to not waste more than 12.5% internally over the page sizes.
+// (Except for large pages since huge objects are allocated in 4MiB chunks)
+#define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 16kb
+#define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128kb
+#define MI_LARGE_OBJ_SIZE_MAX             (MI_LARGE_PAGE_SIZE/2)   // 2mb 
+#define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)     
+#define MI_HUGE_OBJ_SIZE_MAX              (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE)        // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
 
 // Minimal alignment necessary. On most platforms 16 bytes are needed
 // due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
diff --git a/src/page.c b/src/page.c
index f7e0ce2c..cb891d18 100644
--- a/src/page.c
+++ b/src/page.c
@@ -700,7 +700,10 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept {
   General allocation
 ----------------------------------------------------------- */
 
-// A huge page is allocated directly without being in a queue
+// A huge page is allocated directly without being in a queue.
+// Because huge pages contain just one block, and the segment contains
+// just that page, we always treat them as abandoned and any thread
+// that frees the block can free the whole page and segment directly.
 static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
   size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t);
   mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);  
diff --git a/src/segment.c b/src/segment.c
index 3be703cf..85a63ca4 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -709,17 +709,13 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
 /* -----------------------------------------------------------
    Page allocation and free
 ----------------------------------------------------------- */
-static bool mi_is_good_fit(size_t bsize, size_t size) {
-  // good fit if no more than 25% wasted
-  return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4));
-}
 
 mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
   mi_page_t* page;
-  if (block_size <= MI_SMALL_OBJ_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) {
+  if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
     page = mi_segment_small_page_alloc(tld,os_tld);
   }
-  else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) {
+  else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {
     page = mi_segment_medium_page_alloc(tld, os_tld);
   }
   else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) {

From 7972b64a822dba03763d4fb9c28fa43765984da7 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 25 Aug 2019 11:18:16 -0700
Subject: [PATCH 15/40] resolve race on option initialization

---
 src/options.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/options.c b/src/options.c
index bc658ca9..8880c317 100644
--- a/src/options.c
+++ b/src/options.c
@@ -18,6 +18,9 @@ int mi_version(void) mi_attr_noexcept {
 
 // --------------------------------------------------------
 // Options
+// These can be accessed by multiple threads and may be 
+// concurrently initialized, but an initializing data race
+// is ok since they resolve to the same value.
 // --------------------------------------------------------
 typedef enum mi_init_e {
   UNINIT,       // not yet initialized
@@ -180,7 +183,6 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) {
 }
 
 static void mi_option_init(mi_option_desc_t* desc) {
-  desc->init = DEFAULTED;
   // Read option value from the environment
   char buf[32];
   mi_strlcpy(buf, "mimalloc_", sizeof(buf));
@@ -218,7 +220,12 @@ static void mi_option_init(mi_option_desc_t* desc) {
       }
       else {
         _mi_warning_message("environment option mimalloc_%s has an invalid value: %s\n", desc->name, buf);
+        desc->init = DEFAULTED;
       }
     }
   }
+  else {
+    desc->init = DEFAULTED;
+  }
+  mi_assert_internal(desc->init != UNINIT);
 }

From 038e8fd7d68f8a328c0e728b102dccb00df2da0f Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 25 Aug 2019 12:10:18 -0700
Subject: [PATCH 16/40] reduce retire size

---
 src/page.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/page.c b/src/page.c
index cb891d18..54897af5 100644
--- a/src/page.c
+++ b/src/page.c
@@ -393,7 +393,7 @@ void _mi_page_retire(mi_page_t* page) {
   // is the only page left with free blocks. It is not clear
   // how to check this efficiently though... for now we just check
   // if its neighbours are almost fully used.
-  if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
+  if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) {
     if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
       _mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1);
       return; // dont't retire after all

From b86c851cca62fc03233295178c2119d43e7ce486 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 25 Aug 2019 12:29:01 -0700
Subject: [PATCH 17/40] put segment cache behind an option and disable by
 default

---
 include/mimalloc.h |  1 +
 src/options.c      |  1 +
 src/segment.c      | 15 ++++++++-------
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 9f27e463..4f13bc1f 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -228,6 +228,7 @@ typedef enum mi_option_e {
   mi_option_eager_commit,
   mi_option_eager_region_commit,
   mi_option_large_os_pages,      // implies eager commit
+  mi_option_segment_cache,
   mi_option_page_reset,
   mi_option_cache_reset,
   mi_option_reset_decommits,
diff --git a/src/options.c b/src/options.c
index b48e45ef..b30ff1c6 100644
--- a/src/options.c
+++ b/src/options.c
@@ -65,6 +65,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 1, UNINIT, MI_OPTION(eager_region_commit) },
   #endif
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
+  { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
   { 0, UNINIT, MI_OPTION(page_reset) },
   { 0, UNINIT, MI_OPTION(cache_reset) },
   { 0, UNINIT, MI_OPTION(reset_decommits) }      // note: cannot enable this if secure is on
diff --git a/src/segment.c b/src/segment.c
index 85a63ca4..18c06fbc 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -236,8 +236,6 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
 
 
 // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
-// and no more than 4.
-#define MI_SEGMENT_CACHE_MAX      (4)
 #define MI_SEGMENT_CACHE_FRACTION (8)
 
 // note: returned segment may be partially reset
@@ -253,15 +251,18 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t
   return segment;
 }
 
-static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
-  if (tld->cache_count <  MI_SEGMENT_CACHE_MAX 
-      && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))
-     ) { // always allow 1 element cache
+static bool mi_segment_cache_full(mi_segments_tld_t* tld) 
+{
+  if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread
+  size_t max_cache = mi_option_get(mi_option_segment_cache);
+  if (tld->cache_count < max_cache
+       && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache
+     ) { 
     return false;
   }
   // take the opportunity to reduce the segment cache if it is too large (now)
   // TODO: this never happens as we check against peak usage, should we use current usage instead?
-  while (tld->cache_count > MI_SEGMENT_CACHE_MAX ) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) {
+  while (tld->cache_count > max_cache) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) {
     mi_segment_t* segment = mi_segment_cache_pop(0,tld);
     mi_assert_internal(segment != NULL);
     if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld);

From 6e360d34eea79e85b21352014a88c53c104c211d Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 25 Aug 2019 13:15:26 -0700
Subject: [PATCH 18/40] fix 1GB huge page flag on Linux

---
 src/os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/os.c b/src/os.c
index a1b6cdf3..3a9bd30c 100644
--- a/src/os.c
+++ b/src/os.c
@@ -353,7 +353,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
       lflags |= MAP_HUGETLB;
       #endif
       #ifdef MAP_HUGE_1GB
-      if ((size % (uintptr_t)1 << 20) == 0) {
+      if ((size % ((uintptr_t)1 << 30)) == 0) {
         lflags |= MAP_HUGE_1GB;
       }
       else

From e8664001f76981079191b22aff6dbdada135e6fa Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 25 Aug 2019 22:59:12 -0700
Subject: [PATCH 19/40] Use standard _Atomic declarations and clean up atomic
 operations

---
 include/mimalloc-atomic.h | 186 +++++++++++++++++++-------------------
 include/mimalloc-types.h  |  11 ++-
 src/alloc.c               |   6 +-
 src/memory.c              |  54 +++++------
 src/options.c             |   2 +-
 src/os.c                  |  18 ++--
 src/page.c                |  13 +--
 src/segment.c             |  12 +--
 src/stats.c               |  22 ++---
 9 files changed, 165 insertions(+), 159 deletions(-)

diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h
index d504634c..739d0512 100644
--- a/include/mimalloc-atomic.h
+++ b/include/mimalloc-atomic.h
@@ -9,63 +9,98 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MIMALLOC_ATOMIC_H
 
 // ------------------------------------------------------
-// Atomics
+// Atomics 
+// We need to be portable between C, C++, and MSVC.
 // ------------------------------------------------------
 
-// Atomically increment a value; returns the incremented result.
-static inline uintptr_t mi_atomic_increment(volatile uintptr_t* p);
+#if defined(_MSC_VER)
+#define _Atomic(tp)         tp
+#define ATOMIC_VAR_INIT(x)  x
+#elif defined(__cplusplus)
+#include <atomic>
+#define  _Atomic(tp)        std::atomic<tp>
+#else
+#include <stdatomic.h>
+#endif
 
-// Atomically increment a value; returns the incremented result.
-static inline uint32_t mi_atomic_increment32(volatile uint32_t* p);
+#define mi_atomic_cast(tp,x)  (volatile _Atomic(tp)*)(x)
 
-// Atomically decrement a value; returns the decremented result.
-static inline uintptr_t mi_atomic_decrement(volatile uintptr_t* p);
+// ------------------------------------------------------
+// Atomic operations specialized for mimalloc
+// ------------------------------------------------------
 
-// Atomically add a 64-bit value; returns the added result.
-static inline int64_t mi_atomic_add(volatile int64_t* p, int64_t add);
+// Atomically add a 64-bit value; returns the previous value. 
+// Note: not using _Atomic(int64_t) as it is only used for stats. 
+static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add);
 
-// Atomically subtract a value; returns the subtracted result.
-static inline uintptr_t mi_atomic_subtract(volatile uintptr_t* p, uintptr_t sub);
+// Atomically add a value; returns the previous value.
+static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add);
 
-// Atomically subtract a value; returns the subtracted result.
-static inline uint32_t mi_atomic_subtract32(volatile uint32_t* p, uint32_t sub);
+// Atomically compare and exchange a value; returns `true` if successful. May fail spuriously.
+// (Note: expected and desired are in opposite order from atomic_compare_exchange)
+static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
 
 // Atomically compare and exchange a value; returns `true` if successful.
-static inline bool mi_atomic_compare_exchange32(volatile uint32_t* p, uint32_t exchange, uint32_t compare);
-
-// Atomically compare and exchange a value; returns `true` if successful.
-static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t exchange, uintptr_t compare);
+static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
 
 // Atomically exchange a value.
-static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange);
+static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange);
 
 // Atomically read a value
-static inline uintptr_t mi_atomic_read(volatile uintptr_t* p);
+static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p);
 
 // Atomically write a value
-static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x);
-
-// Atomically read a pointer
-static inline void* mi_atomic_read_ptr(volatile void** p) {
-  return (void*)mi_atomic_read( (volatile uintptr_t*)p );
-}
+static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x);
 
+// Yield
 static inline void mi_atomic_yield(void);
 
 
+
+// Atomically add a value; returns the previous value.
+static inline uintptr_t mi_atomic_addu(volatile _Atomic(uintptr_t)* p, uintptr_t add) {
+  return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, (intptr_t)add);
+}
+// Atomically subtract a value; returns the previous value.
+static inline uintptr_t mi_atomic_subu(volatile _Atomic(uintptr_t)* p, uintptr_t sub) {
+  return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, -((intptr_t)sub));
+}
+
+// Atomically increment a value; returns the incremented result.
+static inline uintptr_t mi_atomic_increment(volatile _Atomic(uintptr_t)* p) {
+  return mi_atomic_addu(p, 1);
+}
+
+// Atomically decrement a value; returns the decremented result.
+static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) {
+  return mi_atomic_subu(p, 1);
+}
+
+// Atomically read a pointer
+static inline void* mi_atomic_read_ptr_relaxed(volatile _Atomic(void*) const * p) {
+  return (void*)mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)p);
+}
+
 // Atomically write a pointer
-static inline void mi_atomic_write_ptr(volatile void** p, void* x) {
-  mi_atomic_write((volatile uintptr_t*)p, (uintptr_t)x );
+static inline void mi_atomic_write_ptr(volatile _Atomic(void*)* p, void* x) {
+  mi_atomic_write((volatile _Atomic(uintptr_t)*)p, (uintptr_t)x );
+}
+
+// Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously.
+// (Note: expected and desired are in opposite order from atomic_compare_exchange)
+static inline bool mi_atomic_cas_ptr_weak(volatile _Atomic(void*)* p, void* desired, void* expected) {
+  return mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected);
 }
 
 // Atomically compare and exchange a pointer; returns `true` if successful.
-static inline bool mi_atomic_compare_exchange_ptr(volatile void** p, void* newp, void* compare) {
-  return mi_atomic_compare_exchange((volatile uintptr_t*)p, (uintptr_t)newp, (uintptr_t)compare);
+// (Note: expected and desired are in opposite order from atomic_compare_exchange)
+static inline bool mi_atomic_cas_ptr_strong(volatile _Atomic(void*)* p, void* desired, void* expected) {
+  return mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected);
 }
 
 // Atomically exchange a pointer value.
-static inline void* mi_atomic_exchange_ptr(volatile void** p, void* exchange) {
-  return (void*)mi_atomic_exchange((volatile uintptr_t*)p, (uintptr_t)exchange);
+static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exchange) {
+  return (void*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)p, (uintptr_t)exchange);
 }
 
 
@@ -73,49 +108,37 @@ static inline void* mi_atomic_exchange_ptr(volatile void** p, void* exchange) {
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #include <intrin.h>
-#if (MI_INTPTR_SIZE==8)
+#ifdef _WIN64
 typedef LONG64   msc_intptr_t;
 #define RC64(f)  f##64
 #else
 typedef LONG     msc_intptr_t;
 #define RC64(f)  f
 #endif
-static inline uintptr_t mi_atomic_increment(volatile uintptr_t* p) {
-  return (uintptr_t)RC64(_InterlockedIncrement)((volatile msc_intptr_t*)p);
+static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) {
+  return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
 }
-static inline uint32_t mi_atomic_increment32(volatile uint32_t* p) {
-  return (uint32_t)_InterlockedIncrement((volatile LONG*)p);
+static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
+  return (expected == RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
 }
-static inline uintptr_t mi_atomic_decrement(volatile uintptr_t* p) {
-  return (uintptr_t)RC64(_InterlockedDecrement)((volatile msc_intptr_t*)p);
+static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
+  return mi_atomic_cas_strong(p,desired,expected);
 }
-static inline uintptr_t mi_atomic_subtract(volatile uintptr_t* p, uintptr_t sub) {
-  return (uintptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)) - sub;
-}
-static inline uint32_t mi_atomic_subtract32(volatile uint32_t* p, uint32_t sub) {
-  return (uint32_t)_InterlockedExchangeAdd((volatile LONG*)p, -((LONG)sub)) - sub;
-}
-static inline bool mi_atomic_compare_exchange32(volatile uint32_t* p, uint32_t exchange, uint32_t compare) {
-  return ((int32_t)compare == _InterlockedCompareExchange((volatile LONG*)p, (LONG)exchange, (LONG)compare));
-}
-static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t exchange, uintptr_t compare) {
-  return (compare == RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange, (msc_intptr_t)compare));
-}
-static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange) {
+static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
   return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
 }
-static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) {
+static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) {
   return *p;
 }
-static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) {
-  *p = x;
+static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
+  mi_atomic_exchange(p,x);
 }
 static inline void mi_atomic_yield(void) {
   YieldProcessor();
 }
-static inline int64_t mi_atomic_add(volatile int64_t* p, int64_t add) {
-  #if (MI_INTPTR_SIZE==8)
-  return _InterlockedExchangeAdd64(p, add) + add;
+static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) {
+  #ifdef _WIN64
+  return mi_atomic_add(p,add);
   #else
   int64_t current;
   int64_t sum;
@@ -123,62 +146,43 @@ static inline int64_t mi_atomic_add(volatile int64_t* p, int64_t add) {
     current = *p;
     sum = current + add;
   } while (_InterlockedCompareExchange64(p, sum, current) != current);
-  return sum;
+  return current;
   #endif
 }
 
 #else
 #ifdef __cplusplus
-#include <atomic>
 #define  MI_USING_STD   using namespace std;
-#define  _Atomic(tp)    atomic<tp>
 #else
-#include <stdatomic.h>
 #define  MI_USING_STD
 #endif
-static inline uintptr_t mi_atomic_increment(volatile uintptr_t* p) {
+static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add) {
   MI_USING_STD
-  return atomic_fetch_add_explicit((volatile atomic_uintptr_t*)p, (uintptr_t)1, memory_order_relaxed) + 1;
+  return atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed);
 }
-static inline uint32_t mi_atomic_increment32(volatile uint32_t* p) {
+static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) {
   MI_USING_STD
-  return atomic_fetch_add_explicit((volatile _Atomic(uint32_t)*)p, (uint32_t)1, memory_order_relaxed) + 1;
+  return atomic_fetch_add_explicit(p, add, memory_order_relaxed);
 }
-static inline uintptr_t mi_atomic_decrement(volatile uintptr_t* p) {
+static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
   MI_USING_STD
-  return atomic_fetch_sub_explicit((volatile atomic_uintptr_t*)p, (uintptr_t)1, memory_order_relaxed) - 1;
+  return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_relaxed);
 }
-static inline int64_t mi_atomic_add(volatile int64_t* p, int64_t add) {
+static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
   MI_USING_STD
-  return atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed) + add;
+  return atomic_compare_exchange_strong_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_relaxed);
 }
-static inline uintptr_t mi_atomic_subtract(volatile uintptr_t* p, uintptr_t sub) {
+static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
   MI_USING_STD
-  return atomic_fetch_sub_explicit((volatile atomic_uintptr_t*)p, sub, memory_order_relaxed) - sub;
+  return atomic_exchange_explicit(p, exchange, memory_order_acq_rel);
 }
-static inline uint32_t mi_atomic_subtract32(volatile uint32_t* p, uint32_t sub) {
+static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p) {
   MI_USING_STD
-  return atomic_fetch_sub_explicit((volatile _Atomic(uint32_t)*)p, sub, memory_order_relaxed) - sub;
+  return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed);
 }
-static inline bool mi_atomic_compare_exchange32(volatile uint32_t* p, uint32_t exchange, uint32_t compare) {
+static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
   MI_USING_STD
-  return atomic_compare_exchange_weak_explicit((volatile _Atomic(uint32_t)*)p, &compare, exchange, memory_order_release, memory_order_relaxed);
-}
-static inline bool mi_atomic_compare_exchange(volatile uintptr_t* p, uintptr_t exchange, uintptr_t compare) {
-  MI_USING_STD
-  return atomic_compare_exchange_weak_explicit((volatile atomic_uintptr_t*)p, &compare, exchange, memory_order_release, memory_order_relaxed);
-}
-static inline uintptr_t mi_atomic_exchange(volatile uintptr_t* p, uintptr_t exchange) {
-  MI_USING_STD
-  return atomic_exchange_explicit((volatile atomic_uintptr_t*)p, exchange, memory_order_acquire);
-}
-static inline uintptr_t mi_atomic_read(volatile uintptr_t* p) {
-  MI_USING_STD
-  return atomic_load_explicit((volatile atomic_uintptr_t*)p, memory_order_relaxed);
-}
-static inline void mi_atomic_write(volatile uintptr_t* p, uintptr_t x) {
-  MI_USING_STD
-  return atomic_store_explicit((volatile atomic_uintptr_t*)p, x, memory_order_relaxed);
+  return atomic_store_explicit(p, x, memory_order_release);
 }
 
 #if defined(__cplusplus)
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index db39b9c4..0b2334b8 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -10,6 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #include <stddef.h>   // ptrdiff_t
 #include <stdint.h>   // uintptr_t, uint16_t, etc
+#include <mimalloc-atomic.h>  // _Atomic
 
 // ------------------------------------------------------
 // Variants
@@ -177,8 +178,8 @@ typedef struct mi_page_s {
   size_t                used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
   
   mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
-  volatile uintptr_t    thread_freed;      // at least this number of blocks are in `thread_free`
-  volatile mi_thread_free_t thread_free;   // list of deferred free blocks freed by other threads
+  volatile _Atomic(uintptr_t)        thread_freed;  // at least this number of blocks are in `thread_free`
+  volatile _Atomic(mi_thread_free_t) thread_free;   // list of deferred free blocks freed by other threads
 
   // less accessed info
   size_t                block_size;        // size available in each block (always `>0`)
@@ -208,7 +209,7 @@ typedef enum mi_page_kind_e {
 typedef struct mi_segment_s {
   struct mi_segment_s* next;
   struct mi_segment_s* prev;
-  volatile struct mi_segment_s* abandoned_next;
+  volatile _Atomic(struct mi_segment_s*) abandoned_next;
   size_t          abandoned;   // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
   size_t          used;        // count of pages in use (`used <= capacity`)
   size_t          capacity;    // count of available pages (`#free + used`)
@@ -219,7 +220,7 @@ typedef struct mi_segment_s {
 
   // layout like this to optimize access in `mi_free`
   size_t          page_shift;  // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
-  volatile uintptr_t thread_id;   // unique id of the thread owning this segment
+  volatile _Atomic(uintptr_t) thread_id;   // unique id of the thread owning this segment
   mi_page_kind_t  page_kind;   // kind of pages: small, large, or huge
   mi_page_t       pages[1];    // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
 } mi_segment_t;
@@ -255,7 +256,7 @@ struct mi_heap_s {
   mi_tld_t*             tld;
   mi_page_t*            pages_free_direct[MI_SMALL_WSIZE_MAX + 2];   // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
   mi_page_queue_t       pages[MI_BIN_FULL + 1];                      // queue of pages for each size class (or "bin")
-  volatile mi_block_t*  thread_delayed_free;
+  volatile _Atomic(mi_block_t*) thread_delayed_free;
   uintptr_t             thread_id;                                   // thread this heap belongs too
   uintptr_t             cookie;
   uintptr_t             random;                                      // random number used for secure allocation
diff --git a/src/alloc.c b/src/alloc.c
index 76e093e7..97c5fcc4 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -144,7 +144,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
       mi_block_set_next(page, block, mi_tf_block(tfree));
       tfreex = mi_tf_set_block(tfree,block);
     }
-  } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree));
+  } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
 
   if (mi_likely(!use_delayed)) {
     // increment the thread free count and return
@@ -160,7 +160,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
       do {
         dfree = (mi_block_t*)heap->thread_delayed_free;
         mi_block_set_nextx(heap->cookie,block,dfree);
-      } while (!mi_atomic_compare_exchange_ptr((volatile void**)&heap->thread_delayed_free, block, dfree));
+      } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
     }
 
     // and reset the MI_DELAYED_FREEING flag
@@ -168,7 +168,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
       tfreex = tfree = page->thread_free;
       mi_assert_internal(mi_tf_delayed(tfree) == MI_NEVER_DELAYED_FREE || mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
       if (mi_tf_delayed(tfree) != MI_NEVER_DELAYED_FREE) tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
-    } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree));
+    } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
   }
 }
 
diff --git a/src/memory.c b/src/memory.c
index 26f87092..1ea6ee16 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -69,8 +69,8 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld
 // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
 // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
 typedef struct mem_region_s {
-  volatile uintptr_t map;    // in-use bit per MI_SEGMENT_SIZE block
-  volatile void*     start;  // start of virtual memory area
+  volatile _Atomic(uintptr_t) map;    // in-use bit per MI_SEGMENT_SIZE block
+  volatile _Atomic(void*)     start;  // start of virtual memory area
 } mem_region_t;
 
 
@@ -78,7 +78,7 @@ typedef struct mem_region_s {
 // TODO: in the future, maintain a map per NUMA node for numa aware allocation
 static mem_region_t regions[MI_REGION_MAX];
 
-static volatile size_t regions_count = 0;        // allocated regions
+static volatile _Atomic(uintptr_t) regions_count; // = 0;        // allocated regions
 
 
 /* ----------------------------------------------------------------------------
@@ -106,9 +106,9 @@ static size_t mi_good_commit_size(size_t size) {
 // Return if a pointer points into a region reserved by us.
 bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
   if (p==NULL) return false;
-  size_t count = mi_atomic_read(&regions_count);
+  size_t count = mi_atomic_read_relaxed(&regions_count);
   for (size_t i = 0; i < count; i++) {
-    uint8_t* start = (uint8_t*)mi_atomic_read_ptr(&regions[i].start);
+    uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(&regions[i].start);
     if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
   }
   return false;
@@ -127,11 +127,11 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
 {
   size_t mask = mi_region_block_mask(blocks,bitidx);
   mi_assert_internal(mask != 0);
-  mi_assert_internal((mask & mi_atomic_read(&region->map)) == mask);
+  mi_assert_internal((mask & mi_atomic_read_relaxed(&region->map)) == mask);
   mi_assert_internal(&regions[idx] == region);
 
   // ensure the region is reserved
-  void* start = mi_atomic_read_ptr(&region->start);
+  void* start = mi_atomic_read_ptr_relaxed(&region->start);
   if (start == NULL) 
   {
     start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_region_commit), tld);    
@@ -139,13 +139,13 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
       // failure to allocate from the OS! unclaim the blocks and fail
       size_t map;
       do {
-        map = mi_atomic_read(&region->map);
-      } while (!mi_atomic_compare_exchange(&region->map, map & ~mask, map));
+        map = mi_atomic_read_relaxed(&region->map);
+      } while (!mi_atomic_cas_weak(&region->map, map & ~mask, map));
       return false;
     }
 
     // set the newly allocated region
-    if (mi_atomic_compare_exchange_ptr(&region->start, start, NULL)) {
+    if (mi_atomic_cas_ptr_strong(&region->start, start, NULL)) {
       // update the region count
       mi_atomic_increment(&regions_count);
     }
@@ -154,9 +154,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
       // we assign it to a later slot instead (up to 4 tries).
       // note: we don't need to increment the region count, this will happen on another allocation
       for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
-        void* s = mi_atomic_read_ptr(&regions[idx+i].start);
+        void* s = mi_atomic_read_ptr_relaxed(&regions[idx+i].start);
         if (s == NULL) { // quick test
-          if (mi_atomic_compare_exchange_ptr(&regions[idx+i].start, start, s)) {
+          if (mi_atomic_cas_ptr_weak(&regions[idx+i].start, start, s)) {
             start = NULL;
             break;
           }
@@ -167,10 +167,10 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
         _mi_os_free(start, MI_REGION_SIZE, tld->stats);
       }
       // and continue with the memory at our index
-      start = mi_atomic_read_ptr(&region->start);
+      start = mi_atomic_read_ptr_relaxed(&region->start);
     }
   }
-  mi_assert_internal(start == mi_atomic_read_ptr(&region->start));
+  mi_assert_internal(start == mi_atomic_read_ptr_relaxed(&region->start));
   mi_assert_internal(start != NULL);
 
   // Commit the blocks to memory
@@ -230,7 +230,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
 
   const uintptr_t mask = mi_region_block_mask(blocks, 0);
   const size_t bitidx_max = MI_REGION_MAP_BITS - blocks;
-  uintptr_t map = mi_atomic_read(&region->map);
+  uintptr_t map = mi_atomic_read_relaxed(&region->map);
 
   #ifdef MI_HAVE_BITSCAN
   size_t bitidx = mi_bsf(~map);    // quickly find the first zero bit if possible
@@ -245,9 +245,9 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
       mi_assert_internal((m >> bitidx) == mask); // no overflow?
       uintptr_t newmap = map | m;
       mi_assert_internal((newmap^map) >> bitidx == mask);
-      if (!mi_atomic_compare_exchange(&region->map, newmap, map)) {
+      if (!mi_atomic_cas_strong(&region->map, newmap, map)) {
         // no success, another thread claimed concurrently.. keep going
-        map = mi_atomic_read(&region->map);
+        map = mi_atomic_read_relaxed(&region->map);
         continue;
       }
       else {
@@ -281,7 +281,7 @@ static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, b
   // check if there are available blocks in the region..
   mi_assert_internal(idx < MI_REGION_MAX);
   mem_region_t* region = &regions[idx];
-  uintptr_t m = mi_atomic_read(&region->map);
+  uintptr_t m = mi_atomic_read_relaxed(&region->map);
   if (m != MI_REGION_MAP_FULL) {  // some bits are zero
     return mi_region_alloc_blocks(region, idx, blocks, size, commit, p, id, tld);
   }
@@ -317,7 +317,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t*
 
   // find a range of free blocks
   void* p = NULL;
-  size_t count = mi_atomic_read(&regions_count);
+  size_t count = mi_atomic_read_relaxed(&regions_count);
   size_t idx = tld->region_idx; // start index is per-thread to reduce contention
   for (size_t visited = 0; visited < count; visited++, idx++) {
     if (idx >= count) idx = 0;  // wrap around
@@ -376,8 +376,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     size_t mask = mi_region_block_mask(blocks, bitidx);
     mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
     mem_region_t* region = &regions[idx];
-    mi_assert_internal((mi_atomic_read(&region->map) & mask) == mask ); // claimed?
-    void* start = mi_atomic_read_ptr(&region->start);
+    mi_assert_internal((mi_atomic_read_relaxed(&region->map) & mask) == mask ); // claimed?
+    void* start = mi_atomic_read_ptr_relaxed(&region->start);
     mi_assert_internal(start != NULL);
     void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
     mi_assert_internal(blocks_start == p); // not a pointer in our area?
@@ -405,9 +405,9 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     uintptr_t map;
     uintptr_t newmap;
     do {
-      map = mi_atomic_read(&region->map);
+      map = mi_atomic_read_relaxed(&region->map);
       newmap = map & ~mask;
-    } while (!mi_atomic_compare_exchange(&region->map, newmap, map));
+    } while (!mi_atomic_cas_weak(&region->map, newmap, map));
   }
 }
 
@@ -419,17 +419,17 @@ void _mi_mem_collect(mi_stats_t* stats) {
   // free every region that has no segments in use.
   for (size_t i = 0; i < regions_count; i++) {
     mem_region_t* region = &regions[i];
-    if (mi_atomic_read(&region->map) == 0 && region->start != NULL) {
+    if (mi_atomic_read_relaxed(&region->map) == 0 && region->start != NULL) {
       // if no segments used, try to claim the whole region
       uintptr_t m;
       do {
-        m = mi_atomic_read(&region->map);
-      } while(m == 0 && !mi_atomic_compare_exchange(&region->map, ~((uintptr_t)0), 0 ));
+        m = mi_atomic_read_relaxed(&region->map);
+      } while(m == 0 && !mi_atomic_cas_weak(&region->map, ~((uintptr_t)0), 0 ));
       if (m == 0) {
         // on success, free the whole region
         if (region->start != NULL) _mi_os_free((void*)region->start, MI_REGION_SIZE, stats);
         // and release
-        region->start = 0;
+        mi_atomic_write_ptr(&region->start,NULL);
         mi_atomic_write(&region->map,0);
       }
     }
diff --git a/src/options.c b/src/options.c
index b30ff1c6..88f2503e 100644
--- a/src/options.c
+++ b/src/options.c
@@ -127,7 +127,7 @@ void mi_option_disable(mi_option_t option) {
 // Messages
 // --------------------------------------------------------
 #define MAX_ERROR_COUNT (10)
-static uintptr_t error_count = 0;  // when MAX_ERROR_COUNT stop emitting errors and warnings
+static volatile _Atomic(uintptr_t) error_count; // = 0;  // when MAX_ERROR_COUNT stop emitting errors and warnings
 
 // When overriding malloc, we may recurse into mi_vfprintf if an allocation
 // inside the C runtime causes another message.
diff --git a/src/os.c b/src/os.c
index e7ed57b5..fc9c5acc 100644
--- a/src/os.c
+++ b/src/os.c
@@ -186,11 +186,11 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats)
 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
 #if (MI_INTPTR_SIZE >= 8) 
     // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations
-  static volatile intptr_t aligned_base = ((intptr_t)4 << 40); // starting at 4TiB
+  static volatile _Atomic(intptr_t) aligned_base = ATOMIC_VAR_INIT((intptr_t)4 << 40); // starting at 4TiB
   if (addr == NULL && try_alignment > 0 &&
       try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE) == 0) 
   {
-    intptr_t hint = mi_atomic_add(&aligned_base, size) - size;
+    intptr_t hint = mi_atomic_add(&aligned_base, size);
     if (hint%try_alignment == 0) {
       return VirtualAlloc((void*)hint, size, flags, PAGE_READWRITE);
     }
@@ -214,11 +214,11 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
   static volatile uintptr_t large_page_try_ok = 0;
   void* p = NULL;
   if (use_large_os_page(size, try_alignment)) {
-    uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
+    uintptr_t try_ok = mi_atomic_read_relaxed(&large_page_try_ok);
     if (try_ok > 0) {
       // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
       // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times.
-      mi_atomic_compare_exchange(&large_page_try_ok, try_ok - 1, try_ok);
+      mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok);
     }
     else {
       // large OS pages must always reserve and commit.
@@ -253,9 +253,9 @@ static void* mi_unix_mmapx(size_t size, size_t try_alignment, int protect_flags,
   void* p = NULL;
   #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
   // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations
-  static volatile intptr_t aligned_base = ((intptr_t)1 << 42); // starting at 4TiB
+  static volatile _Atomic(intptr_t) aligned_base = ATOMIC_VAR_INIT((intptr_t)1 << 42); // starting at 4TiB
   if (try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE)==0) {
-    intptr_t hint = mi_atomic_add(&aligned_base,size) - size;
+    intptr_t hint = mi_atomic_add(&aligned_base,size);
     if (hint%try_alignment == 0) {
       p = mmap((void*)hint,size,protect_flags,flags,fd,0);
       if (p==MAP_FAILED) p = NULL; // fall back to regular mmap
@@ -291,14 +291,14 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags)
   fd = VM_MAKE_TAG(100);
   #endif
   if (use_large_os_page(size, try_alignment)) {
-    static volatile uintptr_t large_page_try_ok = 0;
-    uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
+    static volatile _Atomic(uintptr_t) large_page_try_ok = 0;
+    uintptr_t try_ok = mi_atomic_read_relaxed(&large_page_try_ok);
     if (try_ok > 0) {
       // If the OS is not configured for large OS pages, or the user does not have
       // enough permission, the `mmap` will always fail (but it might also fail for other reasons).
       // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times
       // to avoid too many failing calls to mmap.
-      mi_atomic_compare_exchange(&large_page_try_ok, try_ok - 1, try_ok);
+      mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok);
     }
     else {
       int lflags = flags;
diff --git a/src/page.c b/src/page.c
index 54897af5..a95f5b51 100644
--- a/src/page.c
+++ b/src/page.c
@@ -49,11 +49,12 @@ static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
   return count;
 }
 
+/*
 // Start of the page available memory
 static inline uint8_t* mi_page_area(const mi_page_t* page) {
   return _mi_page_start(_mi_page_segment(page), page, NULL);
 }
-
+*/
 
 static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
   size_t psize;
@@ -126,7 +127,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay  ) {
     }
   }
   while((mi_tf_delayed(tfreex) !=  mi_tf_delayed(tfree)) && // avoid atomic operation if already equal
-        !mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree));
+        !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
 }
 
 
@@ -147,7 +148,7 @@ static void mi_page_thread_free_collect(mi_page_t* page)
     tfree = page->thread_free;
     head = mi_tf_block(tfree);
     tfreex = mi_tf_set_block(tfree,NULL);
-  } while (!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree));
+  } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
 
   // return if the list is empty
   if (head == NULL) return;
@@ -166,7 +167,7 @@ static void mi_page_thread_free_collect(mi_page_t* page)
   page->free = head;
 
   // update counts now
-  mi_atomic_subtract(&page->thread_freed, count);
+  mi_atomic_subu(&page->thread_freed, count);
   page->used -= count;
 }
 
@@ -257,7 +258,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
   mi_block_t* block;
   do {
     block = (mi_block_t*)heap->thread_delayed_free;
-  } while (block != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&heap->thread_delayed_free, NULL, block));
+  } while (block != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), NULL, block));
 
   // and free them all
   while(block != NULL) {
@@ -270,7 +271,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
       do {
         dfree = (mi_block_t*)heap->thread_delayed_free;
         mi_block_set_nextx(heap->cookie, block, dfree);
-      } while (!mi_atomic_compare_exchange_ptr((volatile void**)&heap->thread_delayed_free, block, dfree));
+      } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
 
     }
     block = next;
diff --git a/src/segment.c b/src/segment.c
index 18c06fbc..9a744ea6 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -542,8 +542,8 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
 // live blocks (reached through other threads). Such segments
 // are "abandoned" and will be reclaimed by other threads to
 // reuse their pages and/or free them eventually
-static volatile mi_segment_t* abandoned = NULL;
-static volatile uintptr_t     abandoned_count = 0;
+static volatile _Atomic(mi_segment_t*) abandoned; // = NULL;
+static volatile _Atomic(uintptr_t)     abandoned_count; // = 0;
 
 static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   mi_assert_internal(segment->used == segment->abandoned);
@@ -561,9 +561,9 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   segment->thread_id = 0;
   mi_segment_t* next;
   do {
-    next = (mi_segment_t*)abandoned;
-    mi_atomic_write_ptr((volatile void**)&segment->abandoned_next, next);
-  } while (!mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, segment, next));
+    next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&abandoned));
+    mi_atomic_write_ptr(mi_atomic_cast(void*,&segment->abandoned_next), next);
+  } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), segment, next));
   mi_atomic_increment(&abandoned_count);
 }
 
@@ -597,7 +597,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
     mi_segment_t* segment;
     do {
       segment = (mi_segment_t*)abandoned;
-    } while(segment != NULL && !mi_atomic_compare_exchange_ptr((volatile void**)&abandoned, (mi_segment_t*)segment->abandoned_next, segment));
+    } while(segment != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), (mi_segment_t*)segment->abandoned_next, segment));
     if (segment==NULL) break; // stop early if no more segments available
 
     // got it.
diff --git a/src/stats.c b/src/stats.c
index 39015f94..2176ba17 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -38,13 +38,13 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
   if (mi_is_in_main(stat))
   {
     // add atomically (for abandoned pages)
-    int64_t current = mi_atomic_add(&stat->current,amount);
+    int64_t current = mi_atomic_add64(&stat->current,amount);
     if (current > stat->peak) stat->peak = stat->current;  // racing.. it's ok
     if (amount > 0) {
-      mi_atomic_add(&stat->allocated,amount);
+      mi_atomic_add64(&stat->allocated,amount);
     }
     else {
-      mi_atomic_add(&stat->freed, -amount);
+      mi_atomic_add64(&stat->freed, -amount);
     }
   }
   else {
@@ -62,8 +62,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
 
 void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {  
   if (mi_is_in_main(stat)) {
-    mi_atomic_add( &stat->count, 1 );
-    mi_atomic_add( &stat->total, (int64_t)amount );
+    mi_atomic_add64( &stat->count, 1 );
+    mi_atomic_add64( &stat->total, (int64_t)amount );
   }
   else {
     stat->count++;
@@ -82,16 +82,16 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
 // must be thread safe as it is called from stats_merge
 static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) {
   if (stat==src) return;
-  mi_atomic_add( &stat->allocated, src->allocated * unit);
-  mi_atomic_add( &stat->current, src->current * unit);
-  mi_atomic_add( &stat->freed, src->freed * unit);
-  mi_atomic_add( &stat->peak, src->peak * unit);
+  mi_atomic_add64( &stat->allocated, src->allocated * unit);
+  mi_atomic_add64( &stat->current, src->current * unit);
+  mi_atomic_add64( &stat->freed, src->freed * unit);
+  mi_atomic_add64( &stat->peak, src->peak * unit);
 }
 
 static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) {
   if (stat==src) return;
-  mi_atomic_add( &stat->total, src->total * unit);
-  mi_atomic_add( &stat->count, src->count * unit);
+  mi_atomic_add64( &stat->total, src->total * unit);
+  mi_atomic_add64( &stat->count, src->count * unit);
 }
 
 // must be thread safe as it is called from stats_merge

From baabc775034efeb55a93c8088492933e56d8334f Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 25 Aug 2019 23:02:41 -0700
Subject: [PATCH 20/40] use proper atomic initialization macros

---
 src/init.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/init.c b/src/init.c
index 76e586f2..290caeec 100644
--- a/src/init.c
+++ b/src/init.c
@@ -19,7 +19,8 @@ const mi_page_t _mi_page_empty = {
   0,
   #endif
   0,       // used
-  NULL, 0, 0,
+  NULL, 
+  ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0),
   0, NULL, NULL, NULL
   #if (MI_INTPTR_SIZE==8 && MI_SECURE>0) || (MI_INTPTR_SIZE==4 && MI_SECURE==0)
   , { NULL } // padding
@@ -81,7 +82,7 @@ const mi_heap_t _mi_heap_empty = {
   NULL,
   MI_SMALL_PAGES_EMPTY,
   MI_PAGE_QUEUES_EMPTY,
-  NULL,
+  ATOMIC_VAR_INIT(NULL),
   0,
   0,
   0,

From 2159c224151e5be1f3bcf73acefe62eef17d080f Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Sun, 25 Aug 2019 23:06:18 -0700
Subject: [PATCH 21/40] fix atomic declaration on windows

---
 src/os.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/os.c b/src/os.c
index fc9c5acc..fb36f3fc 100644
--- a/src/os.c
+++ b/src/os.c
@@ -211,7 +211,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
 }
 
 static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags) {
-  static volatile uintptr_t large_page_try_ok = 0;
+  static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
   void* p = NULL;
   if (use_large_os_page(size, try_alignment)) {
     uintptr_t try_ok = mi_atomic_read_relaxed(&large_page_try_ok);
@@ -291,7 +291,7 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags)
   fd = VM_MAKE_TAG(100);
   #endif
   if (use_large_os_page(size, try_alignment)) {
-    static volatile _Atomic(uintptr_t) large_page_try_ok = 0;
+    static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
     uintptr_t try_ok = mi_atomic_read_relaxed(&large_page_try_ok);
     if (try_ok > 0) {
       // If the OS is not configured for large OS pages, or the user does not have

From 5c7c106d62f70db566e337abd6575021ec55f1bf Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 26 Aug 2019 08:11:15 -0700
Subject: [PATCH 22/40] strengthen some atomic operations for weak memory
 models

---
 include/mimalloc-atomic.h | 45 +++++++++++++++++++++++++++------------
 src/alloc.c               | 18 +++++++++-------
 src/memory.c              | 20 ++++++++---------
 src/stats.c               |  4 ++--
 4 files changed, 53 insertions(+), 34 deletions(-)

diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h
index 739d0512..3a289feb 100644
--- a/include/mimalloc-atomic.h
+++ b/include/mimalloc-atomic.h
@@ -30,26 +30,32 @@ terms of the MIT license. A copy of the license can be found in the file
 // ------------------------------------------------------
 
 // Atomically add a 64-bit value; returns the previous value. 
-// Note: not using _Atomic(int64_t) as it is only used for stats. 
-static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add);
+// Note: not using _Atomic(int64_t) as it is only used for statistics.
+static inline void mi_atomic_add64(volatile int64_t* p, int64_t add);
 
-// Atomically add a value; returns the previous value.
+// Atomically add a value; returns the previous value. Memory ordering is relaxed.
 static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add);
 
-// Atomically compare and exchange a value; returns `true` if successful. May fail spuriously.
+// Atomically compare and exchange a value; returns `true` if successful. 
+// May fail spuriously. Memory ordering as release on success, and relaxed on failure.
 // (Note: expected and desired are in opposite order from atomic_compare_exchange)
 static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
 
 // Atomically compare and exchange a value; returns `true` if successful.
+// Memory ordering is acquire-release
+// (Note: expected and desired are in opposite order from atomic_compare_exchange)
 static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
 
-// Atomically exchange a value.
+// Atomically exchange a value. Memory ordering is acquire-release.
 static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange);
 
-// Atomically read a value
+// Atomically read a value. Memory ordering is relaxed.
 static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p);
 
-// Atomically write a value
+// Atomically read a value. Memory ordering is acquire.
+static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p);
+
+// Atomically write a value. Memory ordering is release.
 static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x);
 
 // Yield
@@ -76,11 +82,16 @@ static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) {
   return mi_atomic_subu(p, 1);
 }
 
-// Atomically read a pointer
+// Atomically read a pointer; Memory order is relaxed.
 static inline void* mi_atomic_read_ptr_relaxed(volatile _Atomic(void*) const * p) {
   return (void*)mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)p);
 }
 
+// Atomically read a pointer; Memory order is acquire.
+static inline void* mi_atomic_read_ptr(volatile _Atomic(void*) const * p) {
+  return (void*)mi_atomic_read((const volatile _Atomic(uintptr_t)*)p);
+}
+
 // Atomically write a pointer
 static inline void mi_atomic_write_ptr(volatile _Atomic(void*)* p, void* x) {
   mi_atomic_write((volatile _Atomic(uintptr_t)*)p, (uintptr_t)x );
@@ -127,18 +138,21 @@ static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t
 static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
   return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
 }
-static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) {
+static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) {
   return *p;
 }
+static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) {
+  return mi_atomic_read(p);
+}
 static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
   mi_atomic_exchange(p,x);
 }
 static inline void mi_atomic_yield(void) {
   YieldProcessor();
 }
-static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) {
+static inline void mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) {
   #ifdef _WIN64
-  return mi_atomic_add(p,add);
+  mi_atomic_add(p,add);
   #else
   int64_t current;
   int64_t sum;
@@ -146,7 +160,6 @@ static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add)
     current = *p;
     sum = current + add;
   } while (_InterlockedCompareExchange64(p, sum, current) != current);
-  return current;
   #endif
 }
 
@@ -156,9 +169,9 @@ static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add)
 #else
 #define  MI_USING_STD
 #endif
-static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add) {
+static inline void mi_atomic_add64(volatile int64_t* p, int64_t add) {
   MI_USING_STD
-  return atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed);
+  atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed);
 }
 static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) {
   MI_USING_STD
@@ -180,6 +193,10 @@ static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)
   MI_USING_STD
   return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed);
 }
+static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p) {
+  MI_USING_STD
+  return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_acquire);
+}
 static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
   MI_USING_STD
   return atomic_store_explicit(p, x, memory_order_release);
diff --git a/src/alloc.c b/src/alloc.c
index 97c5fcc4..7e89a591 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -118,22 +118,24 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
   mi_segment_t* segment = _mi_page_segment(page);
   if (segment->page_kind==MI_PAGE_HUGE) {
     // huge page segments are always abandoned and can be freed immediately
-    mi_assert_internal(segment->thread_id==0);
-    mi_assert_internal(segment->abandoned_next==NULL);
+    mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0);
+    mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&segment->abandoned_next))==NULL);
     // claim it and free
-    mi_block_set_next(page, block, page->free);
-    page->free = block;
-    page->used--;
     mi_heap_t* heap = mi_get_default_heap();
-    segment->thread_id = heap->thread_id;
-    _mi_segment_page_free(page,true,&heap->tld->segments);
+    // paranoia: if this it the last reference, the cas should always succeed
+    if (mi_atomic_cas_strong(&segment->thread_id,heap->thread_id,0)) {
+      mi_block_set_next(page, block, page->free);
+      page->free = block;
+      page->used--;
+      _mi_segment_page_free(page,true,&heap->tld->segments);
+    }
     return;
   }
 
   do {
     tfree = page->thread_free;
     use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE ||
-                   (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == page->thread_freed+1)
+                   (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == mi_atomic_read_relaxed(&page->thread_freed)+1)  // data-race but ok, just optimizes early release of the page
                   );
     if (mi_unlikely(use_delayed)) {
       // unlikely: this only happens on the first concurrent free in a page that is in the full list
diff --git a/src/memory.c b/src/memory.c
index 1ea6ee16..268dc153 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -131,7 +131,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
   mi_assert_internal(&regions[idx] == region);
 
   // ensure the region is reserved
-  void* start = mi_atomic_read_ptr_relaxed(&region->start);
+  void* start = mi_atomic_read_ptr(&region->start);
   if (start == NULL) 
   {
     start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_region_commit), tld);    
@@ -154,9 +154,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
       // we assign it to a later slot instead (up to 4 tries).
       // note: we don't need to increment the region count, this will happen on another allocation
       for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
-        void* s = mi_atomic_read_ptr_relaxed(&regions[idx+i].start);
+        void* s = mi_atomic_read_ptr(&regions[idx+i].start);
         if (s == NULL) { // quick test
-          if (mi_atomic_cas_ptr_weak(&regions[idx+i].start, start, s)) {
+          if (mi_atomic_cas_ptr_strong(&regions[idx+i].start, start, NULL)) {
             start = NULL;
             break;
           }
@@ -167,10 +167,10 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
         _mi_os_free(start, MI_REGION_SIZE, tld->stats);
       }
       // and continue with the memory at our index
-      start = mi_atomic_read_ptr_relaxed(&region->start);
+      start = mi_atomic_read_ptr(&region->start);
     }
   }
-  mi_assert_internal(start == mi_atomic_read_ptr_relaxed(&region->start));
+  mi_assert_internal(start == mi_atomic_read_ptr(&region->start));
   mi_assert_internal(start != NULL);
 
   // Commit the blocks to memory
@@ -230,7 +230,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
 
   const uintptr_t mask = mi_region_block_mask(blocks, 0);
   const size_t bitidx_max = MI_REGION_MAP_BITS - blocks;
-  uintptr_t map = mi_atomic_read_relaxed(&region->map);
+  uintptr_t map = mi_atomic_read(&region->map);
 
   #ifdef MI_HAVE_BITSCAN
   size_t bitidx = mi_bsf(~map);    // quickly find the first zero bit if possible
@@ -245,9 +245,9 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
       mi_assert_internal((m >> bitidx) == mask); // no overflow?
       uintptr_t newmap = map | m;
       mi_assert_internal((newmap^map) >> bitidx == mask);
-      if (!mi_atomic_cas_strong(&region->map, newmap, map)) {
+      if (!mi_atomic_cas_weak(&region->map, newmap, map)) {
         // no success, another thread claimed concurrently.. keep going
-        map = mi_atomic_read_relaxed(&region->map);
+        map = mi_atomic_read(&region->map);
         continue;
       }
       else {
@@ -317,7 +317,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t*
 
   // find a range of free blocks
   void* p = NULL;
-  size_t count = mi_atomic_read_relaxed(&regions_count);
+  size_t count = mi_atomic_read(&regions_count);
   size_t idx = tld->region_idx; // start index is per-thread to reduce contention
   for (size_t visited = 0; visited < count; visited++, idx++) {
     if (idx >= count) idx = 0;  // wrap around
@@ -377,7 +377,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
     mem_region_t* region = &regions[idx];
     mi_assert_internal((mi_atomic_read_relaxed(&region->map) & mask) == mask ); // claimed?
-    void* start = mi_atomic_read_ptr_relaxed(&region->start);
+    void* start = mi_atomic_read_ptr(&region->start);
     mi_assert_internal(start != NULL);
     void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
     mi_assert_internal(blocks_start == p); // not a pointer in our area?
diff --git a/src/stats.c b/src/stats.c
index 2176ba17..4dddb4bc 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -38,8 +38,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
   if (mi_is_in_main(stat))
   {
     // add atomically (for abandoned pages)
-    int64_t current = mi_atomic_add64(&stat->current,amount);
-    if (current > stat->peak) stat->peak = stat->current;  // racing.. it's ok
+    mi_atomic_add64(&stat->current,amount);
+    if (stat->current > stat->peak) stat->peak = stat->current;  // racing.. it's ok
     if (amount > 0) {
       mi_atomic_add64(&stat->allocated,amount);
     }

From 7ce9c02fd40796e4392892c0d413a0ac3462d112 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 26 Aug 2019 08:20:26 -0700
Subject: [PATCH 23/40] make cas weak use release memory order; improve free
 assembly

---
 include/mimalloc-atomic.h | 2 +-
 src/alloc.c               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h
index 3a289feb..8b254d3e 100644
--- a/include/mimalloc-atomic.h
+++ b/include/mimalloc-atomic.h
@@ -179,7 +179,7 @@ static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add
 }
 static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
   MI_USING_STD
-  return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_relaxed);
+  return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed);
 }
 static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
   MI_USING_STD
diff --git a/src/alloc.c b/src/alloc.c
index 7e89a591..afc181dd 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -226,7 +226,7 @@ void mi_free(void* p) mi_attr_noexcept
 #endif
 
   const mi_segment_t* const segment = _mi_ptr_segment(p);
-  if (segment == NULL) return;  // checks for (p==NULL)
+  if (mi_unlikely(segment == NULL)) return;  // checks for (p==NULL)
 
 #if (MI_DEBUG>0)
   if (mi_unlikely(!mi_is_in_heap_region(p))) {

From 2c19388bcfc08fa2acb3b4e58c569b7ff4b060e7 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 26 Aug 2019 11:44:41 -0700
Subject: [PATCH 24/40] initialize mimalloc options at process load

---
 include/mimalloc-internal.h | 1 +
 src/init.c                  | 1 +
 src/options.c               | 7 +++++++
 3 files changed, 9 insertions(+)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index d886bcec..3889c66e 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -27,6 +27,7 @@ void       _mi_error_message(const char* fmt, ...);
 void       _mi_warning_message(const char* fmt, ...);
 void       _mi_verbose_message(const char* fmt, ...);
 void       _mi_trace_message(const char* fmt, ...);
+void       _mi_options_init(void);
 
 // "init.c"
 extern mi_stats_t       _mi_stats_main;
diff --git a/src/init.c b/src/init.c
index 290caeec..4c7fdda0 100644
--- a/src/init.c
+++ b/src/init.c
@@ -416,6 +416,7 @@ static void mi_allocator_done() {
 static void mi_process_load(void) {
   os_preloading = false;
   atexit(&mi_process_done);
+  _mi_options_init();
   mi_process_init();
   //mi_stats_reset();
   if (mi_redirected) _mi_verbose_message("malloc is redirected.\n");
diff --git a/src/options.c b/src/options.c
index 88f2503e..16c50f11 100644
--- a/src/options.c
+++ b/src/options.c
@@ -73,6 +73,13 @@ static mi_option_desc_t options[_mi_option_last] =
 
 static void mi_option_init(mi_option_desc_t* desc);
 
+void _mi_options_init(void) {
+  // called on process load
+  for(int i = 0; i < _mi_option_last; i++ ) {
+    mi_option_get((mi_option_t)i); // initialize
+  }
+}
+
 long mi_option_get(mi_option_t option) {
   mi_assert(option >= 0 && option < _mi_option_last);
   mi_option_desc_t* desc = &options[option];

From 8b06ab1e4946005e4bf8c067c33c53b2647aaf39 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 26 Aug 2019 12:41:35 -0700
Subject: [PATCH 25/40] fix check on gigabyte alignment of huge os pages on
 windows

---
 src/os.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/os.c b/src/os.c
index 1c7696b4..5d1b7576 100644
--- a/src/os.c
+++ b/src/os.c
@@ -198,7 +198,7 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats)
 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
 #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
   // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
-  if ((size % (uintptr_t)1 << 30) == 0 /* 1GiB multiple */
+  if ((size % ((uintptr_t)1 << 30)) == 0 /* 1GiB multiple */
     && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 
     && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0)
     && pNtAllocateVirtualMemoryEx != NULL)
@@ -217,7 +217,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
     }
     else {
       // else fall back to regular large OS pages
-      _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %lx)\n", err);
+      _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error 0x%lx)\n", err);
     }
   }
 #endif

From 3d8c331a1c3994a8727528487c956fddf81e2519 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 26 Aug 2019 12:41:59 -0700
Subject: [PATCH 26/40] search regions always from the lowest index

---
 src/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/memory.c b/src/memory.c
index 268dc153..222b87c2 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -318,7 +318,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t*
   // find a range of free blocks
   void* p = NULL;
   size_t count = mi_atomic_read(&regions_count);
-  size_t idx = tld->region_idx; // start index is per-thread to reduce contention
+  size_t idx = 0; // tld->region_idx; // start index is per-thread to reduce contention
   for (size_t visited = 0; visited < count; visited++, idx++) {
     if (idx >= count) idx = 0;  // wrap around
     if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error

From f0a12699c208191afad6373a64a71c76af7bdb05 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 26 Aug 2019 12:42:25 -0700
Subject: [PATCH 27/40] remove atomic_iread

---
 include/mimalloc-atomic.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h
index 9549cbc3..8b254d3e 100644
--- a/include/mimalloc-atomic.h
+++ b/include/mimalloc-atomic.h
@@ -114,9 +114,6 @@ static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exc
   return (void*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)p, (uintptr_t)exchange);
 }
 
-static inline intptr_t mi_atomic_iread(volatile intptr_t* p) {
-  return (intptr_t)mi_atomic_read( (volatile uintptr_t*)p );
-}
 
 #ifdef _MSC_VER
 #define WIN32_LEAN_AND_MEAN

From eea093000a30b2e069b77f803217622e3901b0b9 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 26 Aug 2019 13:47:52 -0700
Subject: [PATCH 28/40] graceful fallback for huge page allocation on Linux

---
 src/os.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/os.c b/src/os.c
index 5d1b7576..7648c010 100644
--- a/src/os.c
+++ b/src/os.c
@@ -369,6 +369,13 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
       if (large_only || lflags != flags) {
         // try large OS page allocation
         p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
+        #ifdef MAP_HUGE_1GB
+        if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
+          _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno);
+          lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
+          p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
+        }
+        #endif
         if (large_only) return p;
         if (p == NULL) {
           mi_atomic_write(&large_page_try_ok, 10);  // on error, don't try again for the next N allocations

From db8d443ae661870af5c0815b7cdb0e3bdcb0f13b Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Mon, 26 Aug 2019 22:45:26 -0700
Subject: [PATCH 29/40] track more precisely if memory is fixed or committed

---
 include/mimalloc-internal.h |   4 +-
 include/mimalloc-types.h    |  11 +++-
 src/memory.c                | 128 +++++++++++++++++++++---------------
 src/options.c               |   2 +-
 src/os.c                    |  98 +++++++++++++++++----------
 src/segment.c               |  55 ++++++++--------
 6 files changed, 176 insertions(+), 122 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 447df7df..9b3a3907 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -45,8 +45,8 @@ void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocat
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
 
 // memory.c
-void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* id, mi_os_tld_t* tld);
-void*      _mi_mem_alloc(size_t size, bool commit, size_t* id, mi_os_tld_t* tld);
+void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, size_t* id, mi_os_tld_t* tld);
+void*      _mi_mem_alloc(size_t size, bool commit, bool* large, size_t* id, mi_os_tld_t* tld);
 void       _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats);
 
 bool       _mi_mem_reset(void* p, size_t size, mi_stats_t* stats);
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 0b2334b8..4bf51d1d 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -167,7 +167,7 @@ typedef struct mi_page_s {
   bool                  is_committed:1;    // `true` if the page virtual memory is committed
 
   // layout like this to optimize access in `mi_malloc` and `mi_free`
-  uint16_t              capacity;          // number of blocks committed
+  uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
   uint16_t              reserved;          // number of blocks reserved in memory
   mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (16 bits)
 
@@ -207,7 +207,13 @@ typedef enum mi_page_kind_e {
 // the OS. Inside segments we allocated fixed size _pages_ that
 // contain blocks.
 typedef struct mi_segment_s {
-  struct mi_segment_s* next;
+  // memory fields
+  size_t          memid;            // id for the os-level memory manager
+  bool            mem_is_fixed;     // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)    
+  bool            mem_is_committed; // `true` if the whole segment is eagerly committed
+
+  // segment fields
+  struct mi_segment_s* next;   // must be the first segment field -- see `segment.c:segment_alloc`
   struct mi_segment_s* prev;
   volatile _Atomic(struct mi_segment_s*) abandoned_next;
   size_t          abandoned;   // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
@@ -216,7 +222,6 @@ typedef struct mi_segment_s {
   size_t          segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE`
   size_t          segment_info_size;  // space we are using from the first page for segment meta-data and possible guard pages.
   uintptr_t       cookie;      // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
-  size_t          memid;       // id for the os-level memory manager
 
   // layout like this to optimize access in `mi_free`
   size_t          page_shift;  // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
diff --git a/src/memory.c b/src/memory.c
index 222b87c2..a9b87b8e 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -39,14 +39,14 @@ Possible issues:
 
 // Internal raw OS interface
 size_t  _mi_os_large_page_size();
-bool  _mi_os_protect(void* addr, size_t size);
-bool  _mi_os_unprotect(void* addr, size_t size);
-bool  _mi_os_commit(void* p, size_t size, mi_stats_t* stats);
-bool  _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
-bool  _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
-bool  _mi_os_unreset(void* p, size_t size, mi_stats_t* stats);
-void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld);
-
+bool    _mi_os_protect(void* addr, size_t size);
+bool    _mi_os_unprotect(void* addr, size_t size);
+bool    _mi_os_commit(void* p, size_t size, mi_stats_t* stats);
+bool    _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
+bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
+bool    _mi_os_unreset(void* p, size_t size, mi_stats_t* stats);
+void*   _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
+bool    _mi_os_is_huge_reserved(void* p);
 
 // Constants
 #if (MI_INTPTR_SIZE==8)
@@ -66,11 +66,24 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld
 #define MI_REGION_MAP_FULL        UINTPTR_MAX
 
 
+typedef uintptr_t mi_region_info_t;
+
+static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) {
+  return ((uintptr_t)start | ((is_large?1:0) << 1) | (is_committed?1:0));
+}
+
+static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) {
+  if (is_large) *is_large = ((info&0x02) != 0);
+  if (is_committed) *is_committed = ((info&0x01) != 0);
+  return (void*)(info & ~0x03);
+}
+
+
 // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
 // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
 typedef struct mem_region_s {
-  volatile _Atomic(uintptr_t) map;    // in-use bit per MI_SEGMENT_SIZE block
-  volatile _Atomic(void*)     start;  // start of virtual memory area
+  volatile _Atomic(uintptr_t)        map;   // in-use bit per MI_SEGMENT_SIZE block
+  volatile _Atomic(mi_region_info_t) info;  // start of virtual memory area, and flags
 } mem_region_t;
 
 
@@ -108,7 +121,7 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
   if (p==NULL) return false;
   size_t count = mi_atomic_read_relaxed(&regions_count);
   for (size_t i = 0; i < count; i++) {
-    uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(&regions[i].start);
+    uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(&regions[i].info), NULL, NULL);
     if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
   }
   return false;
@@ -123,7 +136,7 @@ Commit from a region
 // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
 // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
 // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld)
+static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld)
 {
   size_t mask = mi_region_block_mask(blocks,bitidx);
   mi_assert_internal(mask != 0);
@@ -131,10 +144,14 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
   mi_assert_internal(&regions[idx] == region);
 
   // ensure the region is reserved
-  void* start = mi_atomic_read_ptr(&region->start);
-  if (start == NULL) 
+  mi_region_info_t info = mi_atomic_read(&region->info);
+  if (info == 0) 
   {
-    start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_region_commit), tld);    
+    bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit);
+    bool region_large  = region_commit && *large;
+    void* start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, &region_large, tld);
+    *large = region_large;
+
     if (start == NULL) {
       // failure to allocate from the OS! unclaim the blocks and fail
       size_t map;
@@ -145,7 +162,8 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
     }
 
     // set the newly allocated region
-    if (mi_atomic_cas_ptr_strong(&region->start, start, NULL)) {
+    info = mi_region_info_create(start,region_large,region_commit);
+    if (mi_atomic_cas_strong(&region->info, info, 0)) {
       // update the region count
       mi_atomic_increment(&regions_count);
     }
@@ -154,12 +172,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
       // we assign it to a later slot instead (up to 4 tries).
       // note: we don't need to increment the region count, this will happen on another allocation
       for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
-        void* s = mi_atomic_read_ptr(&regions[idx+i].start);
-        if (s == NULL) { // quick test
-          if (mi_atomic_cas_ptr_strong(&regions[idx+i].start, start, NULL)) {
-            start = NULL;
-            break;
-          }
+        if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
+          start = NULL;
+          break;
         }
       }
       if (start != NULL) {
@@ -167,15 +182,17 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
         _mi_os_free(start, MI_REGION_SIZE, tld->stats);
       }
       // and continue with the memory at our index
-      start = mi_atomic_read_ptr(&region->start);
+      info = mi_atomic_read(&region->info);
     }
   }
-  mi_assert_internal(start == mi_atomic_read_ptr(&region->start));
-  mi_assert_internal(start != NULL);
+  mi_assert_internal(info == mi_atomic_read(&region->info));
+  mi_assert_internal(info != 0);
 
   // Commit the blocks to memory
+  bool region_is_committed = false;
+  void* start = mi_region_info_read(info,large,&region_is_committed);  
   void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
-  if (commit && !mi_option_is_enabled(mi_option_eager_region_commit)) {
+  if (commit && !region_is_committed) {
     _mi_os_commit(blocks_start, mi_good_commit_size(size), tld->stats);  // only commit needed size (unless using large OS pages)
   }
 
@@ -223,7 +240,7 @@ static inline size_t mi_bsr(uintptr_t x) {
 // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
 // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
 // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld)
+static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld)
 {
   mi_assert_internal(p != NULL && id != NULL);
   mi_assert_internal(blocks < MI_REGION_MAP_BITS);
@@ -253,7 +270,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
       else {
         // success, we claimed the bits
         // now commit the block memory -- this can still fail
-        return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, p, id, tld);
+        return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, large, p, id, tld);
       }
     }
     else {
@@ -276,14 +293,14 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
 // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
 // if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
 // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld)
+static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld)
 {
   // check if there are available blocks in the region..
   mi_assert_internal(idx < MI_REGION_MAX);
   mem_region_t* region = &regions[idx];
   uintptr_t m = mi_atomic_read_relaxed(&region->map);
   if (m != MI_REGION_MAP_FULL) {  // some bits are zero
-    return mi_region_alloc_blocks(region, idx, blocks, size, commit, p, id, tld);
+    return mi_region_alloc_blocks(region, idx, blocks, size, commit, large, p, id, tld);
   }
   else {
     return true;  // no error, but no success either
@@ -296,15 +313,17 @@ static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, b
 
 // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
 // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
-void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* id, mi_os_tld_t* tld)
+void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, size_t* id, mi_os_tld_t* tld)
 {
   mi_assert_internal(id != NULL && tld != NULL);
   mi_assert_internal(size > 0);
   *id = SIZE_MAX;
+  bool default_large = false;
+  if (large==NULL) large = &default_large;  // ensure `large != NULL`
 
   // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`)
   if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) {
-    return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, true, tld);  // round up size
+    return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, tld);  // round up size
   }
 
   // always round size to OS page size multiple (so commit/decommit go over the entire range)
@@ -318,27 +337,27 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t*
   // find a range of free blocks
   void* p = NULL;
   size_t count = mi_atomic_read(&regions_count);
-  size_t idx = 0; // tld->region_idx; // start index is per-thread to reduce contention
+  size_t idx = 0; // tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention?
   for (size_t visited = 0; visited < count; visited++, idx++) {
     if (idx >= count) idx = 0;  // wrap around
-    if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error
+    if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, &p, id, tld)) return NULL; // error
     if (p != NULL) break;
   }
 
   if (p == NULL) {
     // no free range in existing regions -- try to extend beyond the count.. but at most 4 regions
     for (idx = count; idx < count + 4 && idx < MI_REGION_MAX; idx++) {
-      if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error
+      if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, &p, id, tld)) return NULL; // error
       if (p != NULL) break;
     }
   }
 
   if (p == NULL) {
     // we could not find a place to allocate, fall back to the os directly
-    p = _mi_os_alloc_aligned(size, alignment, commit, tld);
+    p = _mi_os_alloc_aligned(size, alignment, commit, large, tld);
   }
   else {
-    tld->region_idx = idx;  // next start of search
+    tld->region_idx = idx;  // next start of search?
   }
 
   mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
@@ -347,8 +366,8 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t*
 
 
 // Allocate `size` memory. Return non NULL on success, with a given memory `id`.
-void* _mi_mem_alloc(size_t size, bool commit, size_t* id, mi_os_tld_t* tld) {
-  return _mi_mem_alloc_aligned(size,0,commit,id,tld);
+void* _mi_mem_alloc(size_t size, bool commit, bool* large, size_t* id, mi_os_tld_t* tld) {
+  return _mi_mem_alloc_aligned(size,0,commit,large,id,tld);
 }
 
 /* ----------------------------------------------------------------------------
@@ -377,7 +396,10 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
     mem_region_t* region = &regions[idx];
     mi_assert_internal((mi_atomic_read_relaxed(&region->map) & mask) == mask ); // claimed?
-    void* start = mi_atomic_read_ptr(&region->start);
+    mi_region_info_t info = mi_atomic_read(&region->info);
+    bool is_large;
+    bool is_eager_committed;
+    void* start = mi_region_info_read(info,&is_large,&is_eager_committed);
     mi_assert_internal(start != NULL);
     void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
     mi_assert_internal(blocks_start == p); // not a pointer in our area?
@@ -388,18 +410,13 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     // TODO: implement delayed decommit/reset as these calls are too expensive
     // if the memory is reused soon.
     // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large
-    if (!mi_option_is_enabled(mi_option_large_os_pages)) {
-      if (mi_option_is_enabled(mi_option_eager_region_commit)) {
-        //_mi_os_reset(p, size, stats);
-      }
-      else {
-        //_mi_os_decommit(p, size, stats);
-      }
-    }
+    if (!is_large) {
+      // _mi_os_reset(p,size,stats);
+      // _mi_os_decommit(p,size,stats); // if !is_committed
+    }    
 
     // TODO: should we free empty regions? currently only done _mi_mem_collect.
-    // this frees up virtual address space which
-    // might be useful on 32-bit systems?
+    // this frees up virtual address space which might be useful on 32-bit systems?
 
     // and unclaim
     uintptr_t map;
@@ -419,17 +436,20 @@ void _mi_mem_collect(mi_stats_t* stats) {
   // free every region that has no segments in use.
   for (size_t i = 0; i < regions_count; i++) {
     mem_region_t* region = &regions[i];
-    if (mi_atomic_read_relaxed(&region->map) == 0 && region->start != NULL) {
+    if (mi_atomic_read_relaxed(&region->map) == 0) {
       // if no segments used, try to claim the whole region
       uintptr_t m;
       do {
         m = mi_atomic_read_relaxed(&region->map);
       } while(m == 0 && !mi_atomic_cas_weak(&region->map, ~((uintptr_t)0), 0 ));
       if (m == 0) {
-        // on success, free the whole region
-        if (region->start != NULL) _mi_os_free((void*)region->start, MI_REGION_SIZE, stats);
+        // on success, free the whole region (unless it was huge reserved)
+        void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, NULL);
+        if (start != NULL && !_mi_os_is_huge_reserved(start)) {
+          _mi_os_free(start, MI_REGION_SIZE, stats);
+        }
         // and release
-        mi_atomic_write_ptr(&region->start,NULL);
+        mi_atomic_write(&region->info,0);
         mi_atomic_write(&region->map,0);
       }
     }
diff --git a/src/options.c b/src/options.c
index 11bb78d8..41bf33a0 100644
--- a/src/options.c
+++ b/src/options.c
@@ -58,7 +58,7 @@ static mi_option_desc_t options[_mi_option_last] =
   #endif
 
   // the following options are experimental and not all combinations make sense.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // note: if eager_region_commit is on, this should be on too.
+  { 1, UNINIT, MI_OPTION(eager_commit) },        // note: needs to be on when eager_region_commit is enabled
   #ifdef _WIN32   // and BSD?
   { 1, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...)
   #else
diff --git a/src/os.c b/src/os.c
index 7648c010..e1306722 100644
--- a/src/os.c
+++ b/src/os.c
@@ -35,10 +35,9 @@ terms of the MIT license. A copy of the license can be found in the file
   On windows initializes support for aligned allocation and
   large OS pages (if MIMALLOC_LARGE_OS_PAGES is true).
 ----------------------------------------------------------- */
-bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
-
-static bool  mi_os_is_huge_reserved(void* p);
-static void* mi_os_alloc_from_huge_reserved(size_t size, size_t try_alignment, bool commit);
+bool          _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
+bool          _mi_os_is_huge_reserved(void* p);
+static void*  mi_os_alloc_from_huge_reserved(size_t size, size_t try_alignment, bool commit);
 
 static void* mi_align_up_ptr(void* p, size_t alignment) {
   return (void*)_mi_align_up((uintptr_t)p, alignment);
@@ -173,7 +172,7 @@ void _mi_os_init() {
 
 static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats)
 {
-  if (addr == NULL || size == 0 || mi_os_is_huge_reserved(addr)) return true;
+  if (addr == NULL || size == 0 || _mi_os_is_huge_reserved(addr)) return true;
   bool err = false;
 #if defined(_WIN32)
   err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
@@ -199,7 +198,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
 #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
   // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
   if ((size % ((uintptr_t)1 << 30)) == 0 /* 1GiB multiple */
-    && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 
+    && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0
     && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0)
     && pNtAllocateVirtualMemoryEx != NULL)
   {
@@ -211,7 +210,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
     param.ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
     SIZE_T psize = size;
     void*  base  = addr;
-    NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags | MEM_RESERVE, PAGE_READWRITE, &param, 1);
+    NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, &param, 1);
     if (err == 0) {
       return base;
     }
@@ -247,10 +246,12 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
   return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
 }
 
-static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only) {
+static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) {
+  mi_assert_internal(!(large_only && !allow_large));
   static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
   void* p = NULL;
-  if (large_only || use_large_os_page(size, try_alignment)) {
+  if ((large_only || use_large_os_page(size, try_alignment)) 
+      && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
     uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
     if (!large_only && try_ok > 0) {
       // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
@@ -259,7 +260,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
     }
     else {
       // large OS pages must always reserve and commit.
-      p = mi_win_virtual_allocx(addr, size, try_alignment, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE | flags);
+      *is_large = true;
+      p = mi_win_virtual_allocx(addr, size, try_alignment, flags | MEM_LARGE_PAGES);
       if (large_only) return p;
       // fall back to non-large page allocation on error (`p == NULL`).
       if (p == NULL) {
@@ -268,6 +270,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
     }
   }
   if (p == NULL) {
+    *is_large = ((flags&MEM_LARGE_PAGES) != 0);
     p = mi_win_virtual_allocx(addr, size, try_alignment, flags);
   }
   if (p == NULL) {
@@ -311,7 +314,7 @@ static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int pr
   return p;
 }
 
-static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only) {
+static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
   void* p = NULL;
   #if !defined(MAP_ANONYMOUS)
   #define MAP_ANONYMOUS  MAP_ANON
@@ -333,7 +336,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
   // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
   fd = VM_MAKE_TAG(100);
   #endif
-  if (large_only || use_large_os_page(size, try_alignment)) {
+  if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) {
     static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
     uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
     if (!large_only && try_ok > 0) {
@@ -368,6 +371,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
       #endif
       if (large_only || lflags != flags) {
         // try large OS page allocation
+        *is_large = true;
         p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
         #ifdef MAP_HUGE_1GB
         if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
@@ -384,6 +388,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
     }
   }
   if (p == NULL) {
+    *is_large = false;
     p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);    
     #if defined(MADV_HUGEPAGE)
     // Many Linux systems don't allow MAP_HUGETLB but they support instead
@@ -392,8 +397,10 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
     // in that case -- in particular for our large regions (in `memory.c`).
     // However, some systems only allow TPH if called with explicit `madvise`, so
     // when large OS pages are enabled for mimalloc, we call `madvice` anyways.
-    if (use_large_os_page(size, try_alignment)) {
-      madvise(p, size, MADV_HUGEPAGE);
+    if (allow_large && use_large_os_page(size, try_alignment)) {
+      if (madvise(p, size, MADV_HUGEPAGE) == 0) {
+        *is_large = true; // possibly
+      };
     }
     #endif
   }
@@ -403,27 +410,35 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
 
 // Primitive allocation from the OS.
 // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, mi_stats_t* stats) {
+static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) {
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
   if (size == 0) return NULL;
+  if (!commit) allow_large = false;
 
-  void* p = mi_os_alloc_from_huge_reserved(size, try_alignment, commit);
-  if (p != NULL) return p;
+  void* p = NULL;
+  if (allow_large) {
+    p = mi_os_alloc_from_huge_reserved(size, try_alignment, commit);
+    if (p != NULL) {
+      *is_large = true;
+      return p;
+    }
+  }
 
   #if defined(_WIN32)
     int flags = MEM_RESERVE;
     if (commit) flags |= MEM_COMMIT;
-    p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false);
+    p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
   #elif defined(__wasi__)
+    *is_large = false;
     p = mi_wasm_heap_grow(size, try_alignment);
   #else
     int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
-    p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false);
+    p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
   #endif
   _mi_stat_increase(&stats->mmap_calls, 1);
   if (p != NULL) {
     _mi_stat_increase(&stats->reserved, size);
-    if (commit) _mi_stat_increase(&stats->committed, size);
+    if (commit) { _mi_stat_increase(&stats->committed, size); }
   }
   return p;
 }
@@ -431,14 +446,15 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, mi_
 
 // Primitive aligned allocation from the OS.
 // This function guarantees the allocated memory is aligned.
-static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, mi_stats_t* stats) {
+static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) {
   mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
+  if (!commit) allow_large = false;
   if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
   size = _mi_align_up(size, _mi_os_page_size());
 
   // try first with a hint (this will be aligned directly on Win 10+ or BSD)
-  void* p = mi_os_mem_alloc(size, alignment, commit, stats);
+  void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats);
   if (p == NULL) return NULL;
 
   // if not aligned, free it, overallocate, and unmap around it
@@ -457,7 +473,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
     if (commit) flags |= MEM_COMMIT;
     for (int tries = 0; tries < 3; tries++) {
       // over-allocate to determine a virtual memory range
-      p = mi_os_mem_alloc(over_size, alignment, commit, stats);
+      p = mi_os_mem_alloc(over_size, alignment, commit, false, is_large, stats);
       if (p == NULL) return NULL; // error
       if (((uintptr_t)p % alignment) == 0) {
         // if p happens to be aligned, just decommit the left-over area
@@ -468,7 +484,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
         // otherwise free and allocate at an aligned address in there
         mi_os_mem_free(p, over_size, stats);
         void* aligned_p = mi_align_up_ptr(p, alignment);
-        p = mi_win_virtual_alloc(aligned_p, size, alignment, flags, false);
+        p = mi_win_virtual_alloc(aligned_p, size, alignment, flags, false, allow_large, is_large);
         if (p == aligned_p) break; // success!
         if (p != NULL) { // should not happen?
           mi_os_mem_free(p, size, stats);
@@ -478,7 +494,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
     }
 #else
     // overallocate...
-    p = mi_os_mem_alloc(over_size, alignment, commit, stats);
+    p = mi_os_mem_alloc(over_size, alignment, commit, false, is_large, stats);
     if (p == NULL) return NULL;
     // and selectively unmap parts around the over-allocated area.
     void* aligned_p = mi_align_up_ptr(p, alignment);
@@ -504,7 +520,8 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
 void* _mi_os_alloc(size_t size, mi_stats_t* stats) {
   if (size == 0) return NULL;
   size = mi_os_good_alloc_size(size, 0);
-  return mi_os_mem_alloc(size, 0, true, stats);
+  bool is_large = false;
+  return mi_os_mem_alloc(size, 0, true, false, &is_large, stats);
 }
 
 void  _mi_os_free(void* p, size_t size, mi_stats_t* stats) {
@@ -513,12 +530,17 @@ void  _mi_os_free(void* p, size_t size, mi_stats_t* stats) {
   mi_os_mem_free(p, size, stats);
 }
 
-void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, mi_os_tld_t* tld)
+void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld)
 {
   if (size == 0) return NULL;
   size = mi_os_good_alloc_size(size, alignment);
   alignment = _mi_align_up(alignment, _mi_os_page_size());
-  return mi_os_mem_alloc_aligned(size, alignment, commit, tld->stats);
+  bool allow_large = false;
+  if (large != NULL) {
+    allow_large = *large;
+    *large = false;
+  }
+  return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats);
 }
 
 
@@ -559,7 +581,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
   // page align in the range, commit liberally, decommit conservative
   size_t csize;
   void* start = mi_os_page_align_areax(conservative, addr, size, &csize);
-  if (csize == 0 || mi_os_is_huge_reserved(addr)) return true;
+  if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true;
   int err = 0;
   if (commit) {
     _mi_stat_increase(&stats->committed, csize);
@@ -611,7 +633,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
   // page align conservatively within the range
   size_t csize;
   void* start = mi_os_page_align_area_conservative(addr, size, &csize);
-  if (csize == 0 || mi_os_is_huge_reserved(addr)) return true;
+  if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true;
   if (reset) _mi_stat_increase(&stats->reset, csize);
         else _mi_stat_decrease(&stats->reset, csize);
   if (!reset) return true; // nothing to do on unreset!
@@ -626,6 +648,11 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
   // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory
   void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE);
   mi_assert_internal(p == start);
+  #if 0
+  if (p == start) {
+    VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set
+  }
+  #endif
   if (p != start) return false;
 #else
 #if defined(MADV_FREE)
@@ -679,8 +706,8 @@ static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
   size_t csize = 0;
   void* start = mi_os_page_align_area_conservative(addr, size, &csize);
   if (csize == 0) return false;
-  if (mi_os_is_huge_reserved(addr)) {
-	_mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
+  if (_mi_os_is_huge_reserved(addr)) {
+	  _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
   }
   int err = 0;
 #ifdef _WIN32
@@ -742,7 +769,7 @@ typedef struct mi_huge_info_s {
 
 static mi_huge_info_t os_huge_reserved = { NULL, 0, ATOMIC_VAR_INIT(0) };
 
-static bool mi_os_is_huge_reserved(void* p) {
+bool _mi_os_is_huge_reserved(void* p) {
   return (mi_atomic_read_ptr(&os_huge_reserved.start) != NULL && 
           p >= mi_atomic_read_ptr(&os_huge_reserved.start) &&
           (uint8_t*)p < (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + mi_atomic_read(&os_huge_reserved.reserved));
@@ -806,10 +833,11 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs ) mi_attr_noexcept
   for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) {
     // allocate lorgu pages
     void* p = NULL; 
+    bool is_large = true;
     #ifdef _WIN32
-    p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true);
+    p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true, true, &is_large);
     #elif defined(MI_OS_USE_MMAP)
-    p = mi_unix_mmap(addr, MI_HUGE_OS_PAGE_SIZE, 0, PROT_READ | PROT_WRITE, true);
+    p = mi_unix_mmap(addr, MI_HUGE_OS_PAGE_SIZE, 0, PROT_READ | PROT_WRITE, true, true, &is_large);
     #else 
     // always fail
     #endif  
diff --git a/src/segment.c b/src/segment.c
index 9a744ea6..020d53e8 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -229,6 +229,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
   segment->thread_id = 0;
   mi_segments_track_size(-((long)segment_size),tld);
   if (mi_option_is_enabled(mi_option_secure)) {
+    mi_assert_internal(!segment->mem_is_fixed);
     _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
   }
   _mi_mem_free(segment, segment_size, segment->memid, tld->stats);
@@ -277,7 +278,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
     return false;
   }
   mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
-  if (mi_option_is_enabled(mi_option_cache_reset)) {
+  if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) {
     _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
   }
   segment->next = tld->cache;
@@ -325,11 +326,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
 
   // Try to get it from our thread local cache first
-  bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM);
+  bool eager = mi_option_is_enabled(mi_option_eager_commit);
+  bool commit = eager || (page_kind > MI_PAGE_MEDIUM);
   bool protection_still_good = false;
   mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
   if (segment != NULL) {
     if (mi_option_is_enabled(mi_option_secure)) {
+      mi_assert_internal(!segment->mem_is_fixed);
       if (segment->page_kind != page_kind) {
         _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs
       }
@@ -337,37 +340,38 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
         protection_still_good = true; // otherwise, the guard pages are still in place
       }
     }
-    if (!mi_option_is_enabled(mi_option_eager_commit)) {
-      if (page_kind > MI_PAGE_MEDIUM) {
-        _mi_mem_commit(segment, segment->segment_size, tld->stats);
-      }
-      else {
-        // ok, commit (and unreset) on demand again
-      }
+    if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) {
+      mi_assert_internal(!segment->mem_is_fixed);
+      _mi_mem_commit(segment, segment->segment_size, tld->stats);
+      segment->mem_is_committed = true;
     }
-    else if (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset)) {
+    if (!segment->mem_is_fixed &&
+        (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) {
       _mi_mem_unreset(segment, segment->segment_size, tld->stats);
     }
   }
   else {
     // Allocate the segment from the OS
     size_t memid;
-    segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &memid, os_tld);
+    bool   mem_large = (eager && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy    
+    segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, &memid, os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     if (!commit) {
+      // ensure the initial info is committed
       _mi_mem_commit(segment, info_size, tld->stats);
     }
     segment->memid = memid;
+    segment->mem_is_fixed = mem_large;
+    segment->mem_is_committed = commit;
     mi_segments_track_size((long)segment_size, tld);
   }
   mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
 
-  // zero the segment info
-  { size_t memid = segment->memid;
-    memset(segment, 0, info_size);
-    segment->memid = memid;
-  }
+  // zero the segment info (but not the `mem` fields)
+  ptrdiff_t ofs = offsetof(mi_segment_t,next);
+  memset((uint8_t*)segment + ofs, 0, info_size - ofs);    
 
+  // guard pages
   if (mi_option_is_enabled(mi_option_secure) && !protection_still_good) {
     // in secure mode, we set up a protected page in between the segment info
     // and the page data
@@ -386,6 +390,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
     }
   }
 
+  // initialize
   segment->page_kind  = page_kind;
   segment->capacity   = capacity;
   segment->page_shift = page_shift;
@@ -453,13 +458,14 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats)
     if (!page->segment_in_use) {
       if (page->is_reset || !page->is_committed) {
         size_t psize;
-        uint8_t* start = _mi_page_start(segment, page, &psize);
-        mi_assert_internal(!(page->is_reset && !page->is_committed));
+        uint8_t* start = _mi_page_start(segment, page, &psize);        
         if (!page->is_committed) {
+          mi_assert_internal(!segment->mem_is_fixed);
           page->is_committed = true;
           _mi_mem_commit(start,psize,stats);
         }
         if (page->is_reset) {
+          mi_assert_internal(!segment->mem_is_fixed);
           page->is_reset = false;
           _mi_mem_unreset(start, psize, stats);
         }
@@ -488,22 +494,17 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_sta
   _mi_stat_decrease(&stats->pages, 1);
 
   // reset the page memory to reduce memory pressure?
-  if (!page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
+  if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
     size_t psize;
     uint8_t* start = _mi_page_start(segment, page, &psize);
     page->is_reset = true;
     _mi_mem_reset(start, psize, stats);
   }
 
-  // zero the page data
-  uint8_t idx = page->segment_idx; // don't clear the index
-  bool is_reset = page->is_reset;  // don't clear the reset flag
-  bool is_committed = page->is_committed; // don't clear the commit flag
-  memset(page, 0, sizeof(*page));
-  page->segment_idx = idx;
+  // zero the page data, but not the segment fields
+  ptrdiff_t ofs = offsetof(mi_page_t,capacity);
+  memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
   page->segment_in_use = false;
-  page->is_reset = is_reset;
-  page->is_committed = is_committed;
   segment->used--;
 }
 

From b72a2d9659216dcf352a69287f39fec7798d305d Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Tue, 27 Aug 2019 18:43:50 +0100
Subject: [PATCH 30/40] macOS: anonymous page ID make it as env var.

---
 include/mimalloc.h | 1 +
 src/options.c      | 3 ++-
 src/os.c           | 4 +++-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 4f13bc1f..41514d3e 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -232,6 +232,7 @@ typedef enum mi_option_e {
   mi_option_page_reset,
   mi_option_cache_reset,
   mi_option_reset_decommits,
+  mi_option_os_tag,
   _mi_option_last
 } mi_option_t;
 
diff --git a/src/options.c b/src/options.c
index 16c50f11..5f2ad896 100644
--- a/src/options.c
+++ b/src/options.c
@@ -68,7 +68,8 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
   { 0, UNINIT, MI_OPTION(page_reset) },
   { 0, UNINIT, MI_OPTION(cache_reset) },
-  { 0, UNINIT, MI_OPTION(reset_decommits) }      // note: cannot enable this if secure is on
+  { 0, UNINIT, MI_OPTION(reset_decommits) },     // note: cannot enable this if secure is on
+  { 100, UNINIT, MI_OPTION(os_tag) }             // only apple specific for now but might serve more or less related purpose
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
diff --git a/src/os.c b/src/os.c
index fb36f3fc..0a733aa3 100644
--- a/src/os.c
+++ b/src/os.c
@@ -288,7 +288,9 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags)
   #endif
   #if defined(VM_MAKE_TAG)
   // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
-  fd = VM_MAKE_TAG(100);
+  int os_tag = (int)mi_option_get(mi_option_os_tag);
+  if (os_tag < 100 || os_tag > 255) os_tag = 100;
+  fd = VM_MAKE_TAG(os_tag);
   #endif
   if (use_large_os_page(size, try_alignment)) {
     static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;

From 18e02c3766d9fd8fc47fef2f0346645487d967ff Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 27 Aug 2019 17:02:56 -0700
Subject: [PATCH 31/40] try allocating non-eager segments in non-fixed memory

---
 src/memory.c  | 24 ++++++++++++++++++------
 src/options.c |  2 +-
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/memory.c b/src/memory.c
index a9b87b8e..f53b1ec3 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -262,7 +262,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
       mi_assert_internal((m >> bitidx) == mask); // no overflow?
       uintptr_t newmap = map | m;
       mi_assert_internal((newmap^map) >> bitidx == mask);
-      if (!mi_atomic_cas_weak(&region->map, newmap, map)) {
+      if (!mi_atomic_cas_weak(&region->map, newmap, map)) {  // TODO: use strong cas here?
         // no success, another thread claimed concurrently.. keep going
         map = mi_atomic_read(&region->map);
         continue;
@@ -299,12 +299,24 @@ static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, b
   mi_assert_internal(idx < MI_REGION_MAX);
   mem_region_t* region = &regions[idx];
   uintptr_t m = mi_atomic_read_relaxed(&region->map);
-  if (m != MI_REGION_MAP_FULL) {  // some bits are zero
-    return mi_region_alloc_blocks(region, idx, blocks, size, commit, large, p, id, tld);
-  }
-  else {
-    return true;  // no error, but no success either
+  if (m != MI_REGION_MAP_FULL) {  // some bits are zero    
+    bool ok = (commit || *large); // committing or allow-large is always ok
+    if (!ok) {
+      // otherwise skip incompatible regions if possible. 
+      // this is not guaranteed due to multiple threads allocating at the same time but
+      // that's ok. In secure mode, large is never allowed so that works out; otherwise
+      // we might just not be able to reset/decommit individual pages sometimes.
+      mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
+      bool is_large;
+      bool is_committed;
+      void* start = mi_region_info_read(info,&is_large,&is_committed);
+      ok = (start == NULL || (commit || !is_committed) || (*large || !is_large)); // Todo: test with one bitmap operation?
+    }
+    if (ok) {
+      return mi_region_alloc_blocks(region, idx, blocks, size, commit, large, p, id, tld);
+    }
   }
+  return true;  // no error, but no success either
 }
 
 /* ----------------------------------------------------------------------------
diff --git a/src/options.c b/src/options.c
index 1d030830..1076ce1e 100644
--- a/src/options.c
+++ b/src/options.c
@@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] =
   // the following options are experimental and not all combinations make sense.
   { 1, UNINIT, MI_OPTION(eager_commit) },        // note: needs to be on when eager_region_commit is enabled
   #ifdef _WIN32   // and BSD?
-  { 1, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...)
+  { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...)
   #else
   { 1, UNINIT, MI_OPTION(eager_region_commit) },
   #endif

From a551f3abc470695335bf7368012fe401260bde56 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Tue, 27 Aug 2019 18:08:03 -0700
Subject: [PATCH 32/40] more precise commit statistics

---
 src/memory.c | 12 +++++++++---
 src/os.c     | 26 +++++++++++++++-----------
 src/stats.c  |  1 +
 3 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/src/memory.c b/src/memory.c
index f53b1ec3..3a465d86 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -47,6 +47,7 @@ bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_unreset(void* p, size_t size, mi_stats_t* stats);
 void*   _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
 bool    _mi_os_is_huge_reserved(void* p);
+void    _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
 
 // Constants
 #if (MI_INTPTR_SIZE==8)
@@ -179,7 +180,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
       }
       if (start != NULL) {
         // free it if we didn't succeed to save it to some other region
-        _mi_os_free(start, MI_REGION_SIZE, tld->stats);
+        _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
       }
       // and continue with the memory at our index
       info = mi_atomic_read(&region->info);
@@ -426,6 +427,10 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
       // _mi_os_reset(p,size,stats);
       // _mi_os_decommit(p,size,stats); // if !is_committed
     }    
+    if (!is_eager_committed) {
+      // adjust commit statistics as we commit again when re-using the same slot
+      _mi_stat_decrease(&stats->committed, mi_good_commit_size(size));
+    }
 
     // TODO: should we free empty regions? currently only done _mi_mem_collect.
     // this frees up virtual address space which might be useful on 32-bit systems?
@@ -456,9 +461,10 @@ void _mi_mem_collect(mi_stats_t* stats) {
       } while(m == 0 && !mi_atomic_cas_weak(&region->map, ~((uintptr_t)0), 0 ));
       if (m == 0) {
         // on success, free the whole region (unless it was huge reserved)
-        void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, NULL);
+        bool is_eager_committed;
+        void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, &is_eager_committed);
         if (start != NULL && !_mi_os_is_huge_reserved(start)) {
-          _mi_os_free(start, MI_REGION_SIZE, stats);
+          _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats);
         }
         // and release
         mi_atomic_write(&region->info,0);
diff --git a/src/os.c b/src/os.c
index 566d204d..76778123 100644
--- a/src/os.c
+++ b/src/os.c
@@ -170,7 +170,7 @@ void _mi_os_init() {
   Raw allocation on Windows (VirtualAlloc) and Unix's (mmap).
 ----------------------------------------------------------- */
 
-static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats)
+static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats)
 {
   if (addr == NULL || size == 0 || _mi_os_is_huge_reserved(addr)) return true;
   bool err = false;
@@ -181,7 +181,7 @@ static bool mi_os_mem_free(void* addr, size_t size, mi_stats_t* stats)
 #else
   err = (munmap(addr, size) == -1);
 #endif
-  _mi_stat_decrease(&stats->committed, size); // TODO: what if never committed?
+  if (was_committed) _mi_stat_decrease(&stats->committed, size); 
   _mi_stat_decrease(&stats->reserved, size);
   if (err) {
 #pragma warning(suppress:4996)
@@ -461,7 +461,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
 
   // if not aligned, free it, overallocate, and unmap around it
   if (((uintptr_t)p % alignment != 0)) {
-    mi_os_mem_free(p, size, stats);
+    mi_os_mem_free(p, size, commit, stats);
     if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
     size_t over_size = size + alignment;
 
@@ -484,12 +484,12 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
       }
       else {
         // otherwise free and allocate at an aligned address in there
-        mi_os_mem_free(p, over_size, stats);
+        mi_os_mem_free(p, over_size, commit, stats);
         void* aligned_p = mi_align_up_ptr(p, alignment);
         p = mi_win_virtual_alloc(aligned_p, size, alignment, flags, false, allow_large, is_large);
         if (p == aligned_p) break; // success!
         if (p != NULL) { // should not happen?
-          mi_os_mem_free(p, size, stats);
+          mi_os_mem_free(p, size, commit, stats);
           p = NULL;
         }
       }
@@ -504,8 +504,8 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
     size_t mid_size = _mi_align_up(size, _mi_os_page_size());
     size_t post_size = over_size - pre_size - mid_size;
     mi_assert_internal(pre_size < over_size && post_size < over_size && mid_size >= size);
-    if (pre_size > 0)  mi_os_mem_free(p, pre_size, stats);
-    if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, stats);
+    if (pre_size > 0)  mi_os_mem_free(p, pre_size, commit, stats);
+    if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats);
     // we can return the aligned pointer on `mmap` systems
     p = aligned_p;
 #endif
@@ -526,10 +526,14 @@ void* _mi_os_alloc(size_t size, mi_stats_t* stats) {
   return mi_os_mem_alloc(size, 0, true, false, &is_large, stats);
 }
 
-void  _mi_os_free(void* p, size_t size, mi_stats_t* stats) {
+void  _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats) {
   if (size == 0 || p == NULL) return;
   size = mi_os_good_alloc_size(size, 0);
-  mi_os_mem_free(p, size, stats);
+  mi_os_mem_free(p, size, was_committed, stats);
+}
+
+void  _mi_os_free(void* p, size_t size, mi_stats_t* stats) {
+  _mi_os_free_ex(p, size, true, stats);
 }
 
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld)
@@ -650,7 +654,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
   // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory
   void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE);
   mi_assert_internal(p == start);
-  #if 0
+  #if 1
   if (p == start) {
     VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set
   }
@@ -753,7 +757,7 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) {
   // we cannot shrink on windows, but we can decommit
   return _mi_os_decommit(start, size, stats);
 #else
-  return mi_os_mem_free(start, size, stats);
+  return mi_os_mem_free(start, size, true, stats);
 #endif
 }
 
diff --git a/src/stats.c b/src/stats.c
index 075234b8..292bc84b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -85,6 +85,7 @@ static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64
   mi_atomic_add64( &stat->allocated, src->allocated * unit);
   mi_atomic_add64( &stat->current, src->current * unit);
   mi_atomic_add64( &stat->freed, src->freed * unit);
+  // peak scores do not work across threads..
   mi_atomic_add64( &stat->peak, src->peak * unit);
 }
 

From 9af51506a65f3597ca2b183ebcde1491e3271ed8 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 28 Aug 2019 11:58:30 -0700
Subject: [PATCH 33/40] track commit and is_large status more precisely

---
 include/mimalloc-internal.h |  3 +-
 include/mimalloc.h          |  2 ++
 src/memory.c                | 65 +++++++++++++++++++++++--------------
 src/options.c               |  2 ++
 src/os.c                    | 15 ++++-----
 src/segment.c               |  8 ++---
 6 files changed, 57 insertions(+), 38 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 9b3a3907..97619765 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -45,8 +45,7 @@ void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocat
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
 
 // memory.c
-void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, size_t* id, mi_os_tld_t* tld);
-void*      _mi_mem_alloc(size_t size, bool commit, bool* large, size_t* id, mi_os_tld_t* tld);
+void*      _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, size_t* id, mi_os_tld_t* tld);
 void       _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats);
 
 bool       _mi_mem_reset(void* p, size_t size, mi_stats_t* stats);
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 0357d633..5cec05fa 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -225,11 +225,13 @@ typedef enum mi_option_e {
   mi_option_verbose,
   // the following options are experimental
   mi_option_secure,
+  mi_option_lazy_commit,
   mi_option_eager_commit,
   mi_option_eager_region_commit,
   mi_option_large_os_pages,         // implies eager commit
   mi_option_reserve_huge_os_pages,
   mi_option_segment_cache,
+  mi_option_segment_reset,
   mi_option_page_reset,
   mi_option_cache_reset,
   mi_option_reset_decommits,
diff --git a/src/memory.c b/src/memory.c
index 3a465d86..d8cb204e 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -46,8 +46,9 @@ bool    _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_unreset(void* p, size_t size, mi_stats_t* stats);
 void*   _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
-bool    _mi_os_is_huge_reserved(void* p);
 void    _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
+void*   _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
+bool    _mi_os_is_huge_reserved(void* p);
 
 // Constants
 #if (MI_INTPTR_SIZE==8)
@@ -137,7 +138,7 @@ Commit from a region
 // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
 // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
 // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld)
+static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool* commit, bool* allow_large, void** p, size_t* id, mi_os_tld_t* tld)
 {
   size_t mask = mi_region_block_mask(blocks,bitidx);
   mi_assert_internal(mask != 0);
@@ -149,9 +150,16 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
   if (info == 0) 
   {
     bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit);
-    bool region_large  = region_commit && *large;
-    void* start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, &region_large, tld);
-    *large = region_large;
+    bool region_large  = *allow_large;
+    void* start = NULL;
+    if (region_large) {
+      start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN);
+      if (start != NULL) { region_commit = true; }
+    }
+    if (start == NULL) {
+      start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, &region_large, tld);
+    }
+    mi_assert_internal(!(region_large && !*allow_large));
 
     if (start == NULL) {
       // failure to allocate from the OS! unclaim the blocks and fail
@@ -191,13 +199,22 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
 
   // Commit the blocks to memory
   bool region_is_committed = false;
-  void* start = mi_region_info_read(info,large,&region_is_committed);  
+  bool region_is_large = false;
+  void* start = mi_region_info_read(info,&region_is_large,&region_is_committed);  
+  mi_assert_internal(!(region_is_large && !*allow_large));
+
   void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
-  if (commit && !region_is_committed) {
+  if (*commit && !region_is_committed) {
+    // ensure commit
     _mi_os_commit(blocks_start, mi_good_commit_size(size), tld->stats);  // only commit needed size (unless using large OS pages)
   }
+  else if (!*commit && region_is_committed) {
+    // but even when no commit is requested, we might have committed anyway (in a huge OS page for example)
+    *commit = true;
+  }
 
-  // and return the allocation
+  // and return the allocation  
+  *allow_large = region_is_large;
   *p  = blocks_start;
   *id = (idx*MI_REGION_MAP_BITS) + bitidx;
   return true;
@@ -241,7 +258,7 @@ static inline size_t mi_bsr(uintptr_t x) {
 // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
 // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
 // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld)
+static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool* commit, bool* allow_large, void** p, size_t* id, mi_os_tld_t* tld)
 {
   mi_assert_internal(p != NULL && id != NULL);
   mi_assert_internal(blocks < MI_REGION_MAP_BITS);
@@ -271,7 +288,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
       else {
         // success, we claimed the bits
         // now commit the block memory -- this can still fail
-        return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, large, p, id, tld);
+        return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, allow_large, p, id, tld);
       }
     }
     else {
@@ -294,27 +311,27 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
 // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
 // if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
 // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
-static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, bool commit, bool* large, void** p, size_t* id, mi_os_tld_t* tld)
+static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, bool* commit, bool* allow_large, void** p, size_t* id, mi_os_tld_t* tld)
 {
   // check if there are available blocks in the region..
   mi_assert_internal(idx < MI_REGION_MAX);
   mem_region_t* region = &regions[idx];
   uintptr_t m = mi_atomic_read_relaxed(&region->map);
   if (m != MI_REGION_MAP_FULL) {  // some bits are zero    
-    bool ok = (commit || *large); // committing or allow-large is always ok
+    bool ok = (*commit || *allow_large); // committing or allow-large is always ok
     if (!ok) {
       // otherwise skip incompatible regions if possible. 
       // this is not guaranteed due to multiple threads allocating at the same time but
-      // that's ok. In secure mode, large is never allowed so that works out; otherwise
-      // we might just not be able to reset/decommit individual pages sometimes.
+      // that's ok. In secure mode, large is never allowed for any thread, so that works out; 
+      // otherwise we might just not be able to reset/decommit individual pages sometimes.
       mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
       bool is_large;
       bool is_committed;
       void* start = mi_region_info_read(info,&is_large,&is_committed);
-      ok = (start == NULL || (commit || !is_committed) || (*large || !is_large)); // Todo: test with one bitmap operation?
+      ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation?
     }
     if (ok) {
-      return mi_region_alloc_blocks(region, idx, blocks, size, commit, large, p, id, tld);
+      return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, p, id, tld);
     }
   }
   return true;  // no error, but no success either
@@ -326,7 +343,7 @@ static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, b
 
 // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
 // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
-void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, size_t* id, mi_os_tld_t* tld)
+void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, size_t* id, mi_os_tld_t* tld)
 {
   mi_assert_internal(id != NULL && tld != NULL);
   mi_assert_internal(size > 0);
@@ -336,7 +353,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* la
 
   // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`)
   if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) {
-    return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, tld);  // round up size
+    return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld);  // round up size
   }
 
   // always round size to OS page size multiple (so commit/decommit go over the entire range)
@@ -371,6 +388,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* la
   }
   else {
     tld->region_idx = idx;  // next start of search?
+
   }
 
   mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
@@ -378,10 +396,6 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool* la
 }
 
 
-// Allocate `size` memory. Return non NULL on success, with a given memory `id`.
-void* _mi_mem_alloc(size_t size, bool commit, bool* large, size_t* id, mi_os_tld_t* tld) {
-  return _mi_mem_alloc_aligned(size,0,commit,large,id,tld);
-}
 
 /* ----------------------------------------------------------------------------
 Free
@@ -424,8 +438,11 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
     // if the memory is reused soon.
     // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large
     if (!is_large) {
-      // _mi_os_reset(p,size,stats);
-      // _mi_os_decommit(p,size,stats); // if !is_committed
+      if (mi_option_is_enabled(mi_option_segment_reset)) {
+        _mi_os_reset(p, size, stats);
+        // _mi_os_decommit(p,size,stats); // if !is_eager_committed 
+      }
+      // else { _mi_os_reset(p,size,stats); }
     }    
     if (!is_eager_committed) {
       // adjust commit statistics as we commit again when re-using the same slot
diff --git a/src/options.c b/src/options.c
index 1076ce1e..e5c0c96a 100644
--- a/src/options.c
+++ b/src/options.c
@@ -58,6 +58,7 @@ static mi_option_desc_t options[_mi_option_last] =
   #endif
 
   // the following options are experimental and not all combinations make sense.
+  { 0, UNINIT, MI_OPTION(lazy_commit) },         // the first N segments per thread are lazily committed
   { 1, UNINIT, MI_OPTION(eager_commit) },        // note: needs to be on when eager_region_commit is enabled
   #ifdef _WIN32   // and BSD?
   { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...)
@@ -67,6 +68,7 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
+  { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free
   { 0, UNINIT, MI_OPTION(page_reset) },
   { 0, UNINIT, MI_OPTION(cache_reset) },
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // note: cannot enable this if secure is on
diff --git a/src/os.c b/src/os.c
index 76778123..58abafe0 100644
--- a/src/os.c
+++ b/src/os.c
@@ -35,9 +35,9 @@ terms of the MIT license. A copy of the license can be found in the file
   On windows initializes support for aligned allocation and
   large OS pages (if MIMALLOC_LARGE_OS_PAGES is true).
 ----------------------------------------------------------- */
-bool          _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
-bool          _mi_os_is_huge_reserved(void* p);
-static void*  mi_os_alloc_from_huge_reserved(size_t size, size_t try_alignment, bool commit);
+bool    _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
+bool    _mi_os_is_huge_reserved(void* p);
+void*   _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
 
 static void* mi_align_up_ptr(void* p, size_t alignment) {
   return (void*)_mi_align_up((uintptr_t)p, alignment);
@@ -418,8 +418,8 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
   if (!commit) allow_large = false;
 
   void* p = NULL;
-  if (allow_large) {
-    p = mi_os_alloc_from_huge_reserved(size, try_alignment, commit);
+  if (commit && allow_large) {
+    p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment);
     if (p != NULL) {
       *is_large = true;
       return p;
@@ -781,12 +781,11 @@ bool _mi_os_is_huge_reserved(void* p) {
           (uint8_t*)p < (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + mi_atomic_read(&os_huge_reserved.reserved));
 }
 
-static void* mi_os_alloc_from_huge_reserved(size_t size, size_t try_alignment, bool commit) 
+void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment)
 {
   // only allow large aligned allocations
   if (size < MI_SEGMENT_SIZE || (size % MI_SEGMENT_SIZE) != 0) return NULL;
-  if (try_alignment > MI_SEGMENT_SIZE) return NULL;
-  if (!commit) return NULL;
+  if (try_alignment > MI_SEGMENT_SIZE) return NULL;  
   if (mi_atomic_read_ptr(&os_huge_reserved.start)==NULL) return NULL;
   if (mi_atomic_read(&os_huge_reserved.used) >= mi_atomic_read(&os_huge_reserved.reserved)) return NULL; // already full
 
diff --git a/src/segment.c b/src/segment.c
index 020d53e8..441d79b8 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -326,8 +326,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
 
   // Try to get it from our thread local cache first
-  bool eager = mi_option_is_enabled(mi_option_eager_commit);
-  bool commit = eager || (page_kind > MI_PAGE_MEDIUM);
+  bool lazy   = (tld->count < mi_option_get(mi_option_lazy_commit));  
+  bool commit = (!lazy && mi_option_is_enabled(mi_option_eager_commit)) || (page_kind > MI_PAGE_MEDIUM);
   bool protection_still_good = false;
   mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
   if (segment != NULL) {
@@ -353,8 +353,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   else {
     // Allocate the segment from the OS
     size_t memid;
-    bool   mem_large = (eager && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy    
-    segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, &memid, os_tld);
+    bool   mem_large = (!lazy && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy    
+    segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &memid, os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     if (!commit) {
       // ensure the initial info is committed

From d381fcd9fa3b70778cda8894476886645778e3da Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Wed, 28 Aug 2019 12:09:23 -0700
Subject: [PATCH 34/40] rename lazy to eager_commit_delay

---
 include/mimalloc.h | 4 ++--
 src/options.c      | 4 ++--
 src/segment.c      | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 5cec05fa..15d06bef 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -225,16 +225,16 @@ typedef enum mi_option_e {
   mi_option_verbose,
   // the following options are experimental
   mi_option_secure,
-  mi_option_lazy_commit,
   mi_option_eager_commit,
   mi_option_eager_region_commit,
   mi_option_large_os_pages,         // implies eager commit
   mi_option_reserve_huge_os_pages,
   mi_option_segment_cache,
-  mi_option_segment_reset,
   mi_option_page_reset,
   mi_option_cache_reset,
   mi_option_reset_decommits,
+  mi_option_eager_commit_delay,
+  mi_option_segment_reset,
   mi_option_os_tag,
   _mi_option_last
 } mi_option_t;
diff --git a/src/options.c b/src/options.c
index e5c0c96a..8654550e 100644
--- a/src/options.c
+++ b/src/options.c
@@ -58,7 +58,6 @@ static mi_option_desc_t options[_mi_option_last] =
   #endif
 
   // the following options are experimental and not all combinations make sense.
-  { 0, UNINIT, MI_OPTION(lazy_commit) },         // the first N segments per thread are lazily committed
   { 1, UNINIT, MI_OPTION(eager_commit) },        // note: needs to be on when eager_region_commit is enabled
   #ifdef _WIN32   // and BSD?
   { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...)
@@ -68,10 +67,11 @@ static mi_option_desc_t options[_mi_option_last] =
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
-  { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free
   { 0, UNINIT, MI_OPTION(page_reset) },
   { 0, UNINIT, MI_OPTION(cache_reset) },
   { 0, UNINIT, MI_OPTION(reset_decommits) },     // note: cannot enable this if secure is on
+  { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
+  { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free
   { 100, UNINIT, MI_OPTION(os_tag) }             // only apple specific for now but might serve more or less related purpose
 };
 
diff --git a/src/segment.c b/src/segment.c
index 441d79b8..3777e060 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -326,8 +326,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
 
   // Try to get it from our thread local cache first
-  bool lazy   = (tld->count < mi_option_get(mi_option_lazy_commit));  
-  bool commit = (!lazy && mi_option_is_enabled(mi_option_eager_commit)) || (page_kind > MI_PAGE_MEDIUM);
+  bool eager  = mi_option_is_enabled(mi_option_eager_commit) && (tld->count < mi_option_get(mi_option_eager_commit_delay));
+  bool commit = eager || (page_kind > MI_PAGE_MEDIUM);
   bool protection_still_good = false;
   mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
   if (segment != NULL) {
@@ -353,7 +353,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   else {
     // Allocate the segment from the OS
     size_t memid;
-    bool   mem_large = (!lazy && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy    
+    bool   mem_large = (eager && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy    
     segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &memid, os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     if (!commit) {

From 154fd471a111baec5afd36a66b49d9b0850d392c Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 29 Aug 2019 07:48:15 -0700
Subject: [PATCH 35/40] fix comparison warning

---
 src/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/segment.c b/src/segment.c
index 3777e060..d442d521 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -326,7 +326,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
 
   // Try to get it from our thread local cache first
-  bool eager  = mi_option_is_enabled(mi_option_eager_commit) && (tld->count < mi_option_get(mi_option_eager_commit_delay));
+  bool eager  = mi_option_is_enabled(mi_option_eager_commit) && (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
   bool commit = eager || (page_kind > MI_PAGE_MEDIUM);
   bool protection_still_good = false;
   mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);

From e8c750585390c3da5bc71f60c9ca7339bb77e20f Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 29 Aug 2019 07:49:40 -0700
Subject: [PATCH 36/40] only set has_aligned flag if really necessary

---
 src/alloc-aligned.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index 24f6c440..97f4319f 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -43,10 +43,10 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* heap, size_t size, size_t
   if (p == NULL) return NULL;
 
   // .. and align within the allocation
-  mi_page_set_has_aligned( _mi_ptr_page(p), true );
   uintptr_t adjust = alignment - (((uintptr_t)p + offset) % alignment);
   mi_assert_internal(adjust % sizeof(uintptr_t) == 0);
   void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
+  if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true);
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
   mi_assert_internal( p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p),_mi_ptr_page(aligned_p),aligned_p) );
   return aligned_p;

From 64c1d6de8688c5d53165a6eed5d2ed3613191863 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 29 Aug 2019 07:50:35 -0700
Subject: [PATCH 37/40] fix mi_likely branch that was marked as unlikely

---
 src/page.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/page.c b/src/page.c
index f45f43e3..74c3d88e 100644
--- a/src/page.c
+++ b/src/page.c
@@ -180,7 +180,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
 
   // and the local free list
   if (page->local_free != NULL) {
-    if (mi_unlikely(page->free == NULL)) {
+    if (mi_likely(page->free == NULL)) {
       // usual case
       page->free = page->local_free;
       page->local_free = NULL;

From 4b39c0b06edae29ca9b0bc0ca9e3938538a76a13 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 29 Aug 2019 07:55:57 -0700
Subject: [PATCH 38/40] fix eager_delay test, and allow large OS pages even
 without eager commit enabled

---
 src/segment.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/segment.c b/src/segment.c
index d442d521..b03547b3 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -326,7 +326,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
 
   // Try to get it from our thread local cache first
-  bool eager  = mi_option_is_enabled(mi_option_eager_commit) && (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
+  bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
+  bool eager  = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
   bool commit = eager || (page_kind > MI_PAGE_MEDIUM);
   bool protection_still_good = false;
   mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
@@ -353,7 +354,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
   else {
     // Allocate the segment from the OS
     size_t memid;
-    bool   mem_large = (eager && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy    
+    bool   mem_large = (!eager_delay && !mi_option_is_enabled(mi_option_secure)); // only allow large OS pages once we are no longer lazy    
     segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &memid, os_tld);
     if (segment == NULL) return NULL;  // failed to allocate
     if (!commit) {

From 4819d3f78f5c803961faa94b51fbe6e7179365f7 Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 29 Aug 2019 09:01:00 -0700
Subject: [PATCH 39/40] expose mi_stats_merge function

---
 include/mimalloc.h |  1 +
 src/stats.c        | 34 +++++++++++++++++-----------------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/include/mimalloc.h b/include/mimalloc.h
index 15d06bef..9fd455da 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -110,6 +110,7 @@ mi_decl_export size_t mi_good_size(size_t size)       mi_attr_noexcept;
 mi_decl_export void mi_collect(bool force)    mi_attr_noexcept;
 mi_decl_export void mi_stats_print(FILE* out) mi_attr_noexcept;
 mi_decl_export void mi_stats_reset(void)      mi_attr_noexcept;
+mi_decl_export void mi_stats_merge(void)      mi_attr_noexcept;
 mi_decl_export int  mi_version(void)          mi_attr_noexcept;
 
 mi_decl_export void mi_process_init(void)     mi_attr_noexcept;
diff --git a/src/stats.c b/src/stats.c
index 292bc84b..1ecc8b3a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -11,19 +11,6 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <string.h> // memset
 
 
-/* -----------------------------------------------------------
-  Merge thread statistics with the main one.
------------------------------------------------------------ */
-
-static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src);
-
-void _mi_stats_done(mi_stats_t* stats) {
-  if (stats == &_mi_stats_main) return;
-  mi_stats_add(&_mi_stats_main, stats);
-  memset(stats,0,sizeof(*stats));
-}
-
-
 /* -----------------------------------------------------------
   Statistics operations
 ----------------------------------------------------------- */
@@ -294,6 +281,13 @@ static mi_stats_t* mi_stats_get_default(void) {
   return &heap->tld->stats;
 }
 
+static void mi_stats_merge_from(mi_stats_t* stats) {
+  if (stats != &_mi_stats_main) {
+    mi_stats_add(&_mi_stats_main, stats);
+    memset(stats, 0, sizeof(mi_stats_t));
+  }
+}
+
 void mi_stats_reset(void) mi_attr_noexcept {
   mi_stats_t* stats = mi_stats_get_default();
   if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); }
@@ -301,11 +295,17 @@ void mi_stats_reset(void) mi_attr_noexcept {
   mi_time_start = _mi_clock_start();
 }
 
+void mi_stats_merge(void) mi_attr_noexcept {
+  mi_stats_merge_from( mi_stats_get_default() );
+}
+
+void _mi_stats_done(mi_stats_t* stats) {  // called from `mi_thread_done`
+  mi_stats_merge_from(stats);
+}
+
+
 static void mi_stats_print_ex(mi_stats_t* stats, double secs, FILE* out) {
-  if (stats != &_mi_stats_main) {
-    mi_stats_add(&_mi_stats_main,stats);
-    memset(stats,0,sizeof(mi_stats_t));
-  }
+  mi_stats_merge_from(stats);
   _mi_stats_print(&_mi_stats_main, secs, out);
 }
 

From 7bf12c7b5fbb2aa7c156360a145545c7a45be90f Mon Sep 17 00:00:00 2001
From: daan <daanl@outlook.com>
Date: Thu, 29 Aug 2019 09:42:50 -0700
Subject: [PATCH 40/40] make output function configurable; remove <stdio.h>
 from standard includes

---
 include/mimalloc-internal.h |  2 +-
 include/mimalloc.h          | 16 +++++++----
 src/alloc-override-win.c    |  1 +
 src/options.c               | 57 +++++++++++++++++++++++--------------
 src/stats.c                 | 26 ++++++++---------
 5 files changed, 61 insertions(+), 41 deletions(-)

diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 97619765..3ddb734d 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file
 
 
 // "options.c"
-void       _mi_fprintf(FILE* out, const char* fmt, ...);
+void       _mi_fprintf(mi_output_fun* out, const char* fmt, ...);
 void       _mi_error_message(const char* fmt, ...);
 void       _mi_warning_message(const char* fmt, ...);
 void       _mi_verbose_message(const char* fmt, ...);
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 9fd455da..ed75f617 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -69,8 +69,8 @@ terms of the MIT license. A copy of the license can be found in the file
 // Includes
 // ------------------------------------------------------
 
+#include <stddef.h>     // size_t
 #include <stdbool.h>    // bool
-#include <stdio.h>      // FILE
 
 #ifdef __cplusplus
 extern "C" {
@@ -107,19 +107,23 @@ mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize)
 mi_decl_export size_t mi_usable_size(const void* p)   mi_attr_noexcept;
 mi_decl_export size_t mi_good_size(size_t size)       mi_attr_noexcept;
 
+typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat);
+mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept;
+
+typedef void (mi_output_fun)(const char* msg);
+mi_decl_export void mi_register_output(mi_output_fun* out) mi_attr_noexcept;
+
 mi_decl_export void mi_collect(bool force)    mi_attr_noexcept;
-mi_decl_export void mi_stats_print(FILE* out) mi_attr_noexcept;
+mi_decl_export int  mi_version(void)          mi_attr_noexcept;
 mi_decl_export void mi_stats_reset(void)      mi_attr_noexcept;
 mi_decl_export void mi_stats_merge(void)      mi_attr_noexcept;
-mi_decl_export int  mi_version(void)          mi_attr_noexcept;
+mi_decl_export void mi_stats_print(mi_output_fun* out) mi_attr_noexcept;
 
 mi_decl_export void mi_process_init(void)     mi_attr_noexcept;
 mi_decl_export void mi_thread_init(void)      mi_attr_noexcept;
 mi_decl_export void mi_thread_done(void)      mi_attr_noexcept;
-mi_decl_export void mi_thread_stats_print(FILE* out) mi_attr_noexcept;
+mi_decl_export void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept;
 
-typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat);
-mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept;
 
 // ------------------------------------------------------
 // Aligned allocation
diff --git a/src/alloc-override-win.c b/src/alloc-override-win.c
index 0bd05deb..dc4796ab 100644
--- a/src/alloc-override-win.c
+++ b/src/alloc-override-win.c
@@ -16,6 +16,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include <psapi.h>
 
 #include <stdlib.h> // getenv
+#include <stdio.h>  // _setmaxstdio
 #include <string.h> // strstr
 
 
diff --git a/src/options.c b/src/options.c
index 8654550e..cd9b4e48 100644
--- a/src/options.c
+++ b/src/options.c
@@ -134,6 +134,32 @@ void mi_option_disable(mi_option_t option) {
 }
 
 
+static void mi_out_stderr(const char* msg) {
+  #ifdef _WIN32
+  // on windows with redirection, the C runtime cannot handle locale dependent output 
+  // after the main thread closes so we use direct console output.
+  _cputs(msg);
+  #else
+  fputs(msg, stderr);
+  #endif
+}
+
+// --------------------------------------------------------
+// Default output handler
+// --------------------------------------------------------
+
+static volatile _Atomic(mi_output_fun*) mi_out_default; // = NULL
+
+static mi_output_fun* mi_out_get_default(void) {
+  mi_output_fun* out = (mi_output_fun*)mi_atomic_read_ptr(mi_atomic_cast(void*, &mi_out_default));
+  return (out == NULL ? &mi_out_stderr : out);
+}
+
+void mi_register_output(mi_output_fun* out) mi_attr_noexcept {
+  mi_atomic_write_ptr(mi_atomic_cast(void*,&mi_out_default),out);
+}
+
+
 // --------------------------------------------------------
 // Messages
 // --------------------------------------------------------
@@ -146,31 +172,20 @@ static mi_decl_thread bool recurse = false;
 
 // Define our own limited `fprintf` that avoids memory allocation.
 // We do this using `snprintf` with a limited buffer.
-static void mi_vfprintf( FILE* out, const char* prefix, const char* fmt, va_list args ) {
+static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) {
   char buf[256];
   if (fmt==NULL) return;
   if (_mi_preloading() || recurse) return;
   recurse = true;
-  if (out==NULL) out = stdout;
+  if (out==NULL) out = mi_out_get_default();
   vsnprintf(buf,sizeof(buf)-1,fmt,args);
-  #ifdef _WIN32
-  // on windows with redirection, the C runtime cannot handle locale dependent output 
-  // after the main thread closes so use direct console output.
-  if (out==stderr) {
-    if (prefix != NULL) _cputs(prefix);
-    _cputs(buf);
-  }
-  else 
-  #endif
-  {  
-    if (prefix != NULL) fputs(prefix,out);
-    fputs(buf,out);
-  }
+  if (prefix != NULL) out(prefix);
+  out(buf);
   recurse = false;
   return;
 }
 
-void _mi_fprintf( FILE* out, const char* fmt, ... ) {
+void _mi_fprintf( mi_output_fun* out, const char* fmt, ... ) {
   va_list args;
   va_start(args,fmt);
   mi_vfprintf(out,NULL,fmt,args);
@@ -181,7 +196,7 @@ void _mi_trace_message(const char* fmt, ...) {
   if (mi_option_get(mi_option_verbose) <= 1) return;  // only with verbose level 2 or higher
   va_list args;
   va_start(args, fmt);
-  mi_vfprintf(stderr, "mimalloc: ", fmt, args);
+  mi_vfprintf(NULL, "mimalloc: ", fmt, args);
   va_end(args);
 }
 
@@ -189,7 +204,7 @@ void _mi_verbose_message(const char* fmt, ...) {
   if (!mi_option_is_enabled(mi_option_verbose)) return;
   va_list args;
   va_start(args,fmt);
-  mi_vfprintf(stderr, "mimalloc: ", fmt, args);
+  mi_vfprintf(NULL, "mimalloc: ", fmt, args);
   va_end(args);
 }
 
@@ -198,7 +213,7 @@ void _mi_error_message(const char* fmt, ...) {
   if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return;
   va_list args;
   va_start(args,fmt);
-  mi_vfprintf(stderr, "mimalloc: error: ", fmt, args);
+  mi_vfprintf(NULL, "mimalloc: error: ", fmt, args);
   va_end(args);
   mi_assert(false);
 }
@@ -208,14 +223,14 @@ void _mi_warning_message(const char* fmt, ...) {
   if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return;
   va_list args;
   va_start(args,fmt);
-  mi_vfprintf(stderr, "mimalloc: warning: ", fmt, args);
+  mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args);
   va_end(args);
 }
 
 
 #if MI_DEBUG
 void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) {
-  _mi_fprintf(stderr,"mimalloc: assertion failed: at \"%s\":%u, %s\n  assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion);
+  _mi_fprintf(NULL,"mimalloc: assertion failed: at \"%s\":%u, %s\n  assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion);
   abort();
 }
 #endif
diff --git a/src/stats.c b/src/stats.c
index 1ecc8b3a..37a7bde4 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -8,6 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "mimalloc-internal.h"
 #include "mimalloc-atomic.h"
 
+#include <stdio.h>  // fputs, stderr
 #include <string.h> // memset
 
 
@@ -120,7 +121,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
   Display statistics
 ----------------------------------------------------------- */
 
-static void mi_printf_amount(int64_t n, int64_t unit, FILE* out, const char* fmt) {
+static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char* fmt) {
   char buf[32];
   int  len = 32;
   const char* suffix = (unit <= 0 ? " " : "b");
@@ -141,16 +142,16 @@ static void mi_printf_amount(int64_t n, int64_t unit, FILE* out, const char* fmt
 }
 
 
-static void mi_print_amount(int64_t n, int64_t unit, FILE* out) {
+static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out) {
   mi_printf_amount(n,unit,out,NULL);
 }
 
-static void mi_print_count(int64_t n, int64_t unit, FILE* out) {
+static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out) {
   if (unit==1) _mi_fprintf(out,"%11s"," ");
           else mi_print_amount(n,0,out);
 }
 
-static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, FILE* out ) {
+static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out ) {
   _mi_fprintf(out,"%10s:", msg);  
   if (unit>0) {
     mi_print_amount(stat->peak, unit, out);
@@ -179,24 +180,24 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t
   }
 }
 
-static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, FILE* out ) {
+static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out ) {
   _mi_fprintf(out, "%10s:", msg);
   mi_print_amount(stat->total, -1, out);
   _mi_fprintf(out, "\n");
 }
 
-static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, FILE* out) {
+static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) {
   double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count);
   _mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg);
 }
 
 
-static void mi_print_header( FILE* out ) {
+static void mi_print_header(mi_output_fun* out ) {
   _mi_fprintf(out,"%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak  ", "total  ", "freed  ", "unit  ", "count  ");
 }
 
 #if MI_STAT>1
-static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, FILE* out) {
+static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out) {
   bool found = false;
   char buf[64];
   for (size_t i = 0; i <= max; i++) {
@@ -220,8 +221,7 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin
 
 static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit);
 
-static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_noexcept {
-  if (out == NULL) out = stderr;
+static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_attr_noexcept {
   mi_print_header(out);
   #if MI_STAT>1
   mi_stat_count_t normal = { 0,0,0,0 };
@@ -304,16 +304,16 @@ void _mi_stats_done(mi_stats_t* stats) {  // called from `mi_thread_done`
 }
 
 
-static void mi_stats_print_ex(mi_stats_t* stats, double secs, FILE* out) {
+static void mi_stats_print_ex(mi_stats_t* stats, double secs, mi_output_fun* out) {
   mi_stats_merge_from(stats);
   _mi_stats_print(&_mi_stats_main, secs, out);
 }
 
-void mi_stats_print(FILE* out) mi_attr_noexcept {
+void mi_stats_print(mi_output_fun* out) mi_attr_noexcept {
   mi_stats_print_ex(mi_stats_get_default(),_mi_clock_end(mi_time_start),out);
 }
 
-void mi_thread_stats_print(FILE* out) mi_attr_noexcept {
+void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept {
   _mi_stats_print(mi_stats_get_default(), _mi_clock_end(mi_time_start), out);
 }