From 8afd06b248f6a82763292821bf5096e35f6a5a0b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 4 Nov 2019 08:44:40 -0800 Subject: [PATCH 1/3] use int64 for time (instead of double) --- include/mimalloc-internal.h | 7 ++- src/arena.c | 4 +- src/memory.c | 1 + src/os.c | 22 ++++++--- src/stats.c | 95 ++++++++++++++++++------------------- 5 files changed, 70 insertions(+), 59 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index c28cf0fd..413f76e6 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -106,8 +106,11 @@ uintptr_t _mi_heap_random(mi_heap_t* heap); // "stats.c" void _mi_stats_done(mi_stats_t* stats); -double _mi_clock_end(double start); -double _mi_clock_start(void); + +typedef int64_t mi_msecs_t; +mi_msecs_t _mi_clock_now(void); +mi_msecs_t _mi_clock_end(mi_msecs_t start); +mi_msecs_t _mi_clock_start(void); // "alloc.c" void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` diff --git a/src/arena.c b/src/arena.c index 08a36415..6faf7d3e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -28,7 +28,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar //int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); int _mi_os_numa_node_count(void); @@ -390,7 +390,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, (double)pages / 2.0, &pages_reserved, &hsize); + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, pages*500, &pages_reserved, &hsize); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; diff --git a/src/memory.c b/src/memory.c index a425393c..75a1df92 100644 --- a/src/memory.c +++ b/src/memory.c @@ -564,6 +564,7 @@ void _mi_mem_collect(mi_stats_t* stats) { } } + /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ diff --git a/src/os.c b/src/os.c index 3f299362..44ef9830 100644 --- a/src/os.c +++ b/src/os.c @@ -871,6 +871,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } #endif +#if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages static _Atomic(uintptr_t) mi_huge_start; // = 0 @@ -899,18 +900,25 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { if (total_size != NULL) *total_size = size; return (uint8_t*)start; } +#else +static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { + if (total_size != NULL) *total_size = 0; + return NULL; +} +#endif // Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize) { +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) { if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; uint8_t* start = mi_os_claim_huge_pages(pages, &size); + if (start == NULL) return NULL; // or 32-bit systems // Allocate one page at the time but try to place them contiguously // We allocate one page at the time to be able to abort if it takes too long // or to at least allocate as many as available on the system. - double start_t = _mi_clock_start(); + mi_msecs_t start_t = _mi_clock_start(); size_t page; for (page = 0; page < pages; page++) { // allocate a page @@ -932,14 +940,14 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, s _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); // check for timeout - double elapsed = _mi_clock_end(start_t); + mi_msecs_t elapsed = _mi_clock_end(start_t); if (page >= 1) { - double estimate = ((elapsed / (double)(page+1)) * (double)pages); - if (estimate > 1.5*max_secs) { // seems like we are going to timeout, break - elapsed = max_secs + 1.0; + mi_msecs_t estimate = ((elapsed / (page+1)) * pages); + if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + elapsed = max_msecs + 1; } } - if (elapsed > max_secs) { + if (elapsed > max_msecs) { _mi_warning_message("huge page allocation timed out\n"); break; } diff --git a/src/stats.c b/src/stats.c index 79362cc4..a1248043 100644 --- a/src/stats.c +++ b/src/stats.c @@ -231,9 +231,9 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin #endif -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); -static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_attr_noexcept { +static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) mi_attr_noexcept { mi_print_header(out); #if MI_STAT>1 mi_stat_count_t normal = { 0,0,0,0 }; @@ -266,16 +266,16 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_stat_print(&stats->threads, "threads", -1, out); mi_stat_counter_print_avg(&stats->searches, "searches", out); _mi_fprintf(out, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); - if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs); + if (elapsed > 0) _mi_fprintf(out, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); - double user_time; - double sys_time; + mi_msecs_t user_time; + mi_msecs_t sys_time; size_t peak_rss; size_t page_faults; size_t page_reclaim; size_t peak_commit; mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit); - _mi_fprintf(out,"%10s: user: %.3f s, system: %.3f s, faults: %lu, reclaims: %lu, rss: ", "process", user_time, sys_time, (unsigned long)page_faults, (unsigned long)page_reclaim ); + _mi_fprintf(out,"%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim ); mi_printf_amount((int64_t)peak_rss, 1, out, "%s"); if (peak_commit > 0) { _mi_fprintf(out,", commit charge: "); @@ -284,9 +284,7 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) _mi_fprintf(out,"\n"); } -double _mi_clock_end(double start); -double _mi_clock_start(void); -static double mi_time_start = 0.0; +static mi_msecs_t mi_time_start; // = 0 static mi_stats_t* mi_stats_get_default(void) { mi_heap_t* heap = mi_heap_get_default(); @@ -316,71 +314,72 @@ void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` } -static void mi_stats_print_ex(mi_stats_t* stats, double secs, mi_output_fun* out) { +static void mi_stats_print_ex(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) { mi_stats_merge_from(stats); - _mi_stats_print(&_mi_stats_main, secs, out); + _mi_stats_print(&_mi_stats_main, elapsed, out); } void mi_stats_print(mi_output_fun* out) mi_attr_noexcept { - mi_stats_print_ex(mi_stats_get_default(),_mi_clock_end(mi_time_start),out); + mi_msecs_t elapsed = _mi_clock_end(mi_time_start); + mi_stats_print_ex(mi_stats_get_default(),elapsed,out); } void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept { - _mi_stats_print(mi_stats_get_default(), _mi_clock_end(mi_time_start), out); + mi_msecs_t elapsed = _mi_clock_end(mi_time_start); + _mi_stats_print(mi_stats_get_default(), elapsed, out); } - -// -------------------------------------------------------- -// Basic timer for convenience -// -------------------------------------------------------- - +// ---------------------------------------------------------------- +// Basic timer for convenience; use milli-seconds to avoid doubles +// ---------------------------------------------------------------- #ifdef _WIN32 #include -static double mi_to_seconds(LARGE_INTEGER t) { - static double freq = 0.0; - if (freq <= 0.0) { +static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { + static LARGE_INTEGER mfreq; // = 0 + if (mfreq.QuadPart == 0LL) { LARGE_INTEGER f; QueryPerformanceFrequency(&f); - freq = (double)(f.QuadPart); + mfreq.QuadPart = f.QuadPart/1000LL; + if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; } - return ((double)(t.QuadPart) / freq); + return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); } -static double mi_clock_now(void) { +mi_msecs_t _mi_clock_now(void) { LARGE_INTEGER t; QueryPerformanceCounter(&t); - return mi_to_seconds(t); + return mi_to_msecs(t); } #else #include #ifdef CLOCK_REALTIME -static double mi_clock_now(void) { +mi_msecs_t _mi_clock_now(void) { struct timespec t; clock_gettime(CLOCK_REALTIME, &t); - return (double)t.tv_sec + (1.0e-9 * (double)t.tv_nsec); + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); } #else // low resolution timer -static double mi_clock_now(void) { - return ((double)clock() / (double)CLOCKS_PER_SEC); +mi_msecs_t _mi_clock_now(void) { + return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); } #endif #endif -static double mi_clock_diff = 0.0; +static mi_msecs_t mi_clock_diff; -double _mi_clock_start(void) { +mi_msecs_t _mi_clock_start(void) { if (mi_clock_diff == 0.0) { - double t0 = mi_clock_now(); - mi_clock_diff = mi_clock_now() - t0; + mi_msecs_t t0 = _mi_clock_now(); + mi_clock_diff = _mi_clock_now() - t0; } - return mi_clock_now(); + return _mi_clock_now(); } -double _mi_clock_end(double start) { - double end = mi_clock_now(); +mi_msecs_t _mi_clock_end(mi_msecs_t start) { + mi_msecs_t end = _mi_clock_now(); return (end - start - mi_clock_diff); } @@ -394,21 +393,21 @@ double _mi_clock_end(double start) { #include #pragma comment(lib,"psapi.lib") -static double filetime_secs(const FILETIME* ftime) { +static mi_msecs_t filetime_msecs(const FILETIME* ftime) { ULARGE_INTEGER i; i.LowPart = ftime->dwLowDateTime; i.HighPart = ftime->dwHighDateTime; - double secs = (double)(i.QuadPart) * 1.0e-7; // FILETIME is in 100 nano seconds - return secs; + mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds + return msecs; } -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { FILETIME ct; FILETIME ut; FILETIME st; FILETIME et; GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); - *utime = filetime_secs(&ut); - *stime = filetime_secs(&st); + *utime = filetime_msecs(&ut); + *stime = filetime_msecs(&st); PROCESS_MEMORY_COUNTERS info; GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); @@ -427,11 +426,11 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size #include #endif -static double timeval_secs(const struct timeval* tv) { - return (double)tv->tv_sec + ((double)tv->tv_usec * 1.0e-6); +static mi_msecs_t timeval_secs(const struct timeval* tv) { + return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); } -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); #if defined(__APPLE__) && defined(__MACH__) @@ -452,12 +451,12 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size #pragma message("define a way to get process info") #endif -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { *peak_rss = 0; *page_faults = 0; *page_reclaim = 0; *peak_commit = 0; - *utime = 0.0; - *stime = 0.0; + *utime = 0; + *stime = 0; } #endif From aece753dce816e6c6967449eef5de5fd30d4a294 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 12:18:20 -0800 Subject: [PATCH 2/3] fix output during preloading enabling stderr only after the crt has loaded --- src/options.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/options.c b/src/options.c index a49c46ed..0b3c6c97 100644 --- a/src/options.c +++ b/src/options.c @@ -14,6 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // toupper #include +static void mi_add_stderr_output(); + int mi_version(void) mi_attr_noexcept { return MI_MALLOC_VERSION; } @@ -72,7 +74,9 @@ static mi_option_desc_t options[_mi_option_last] = static void mi_option_init(mi_option_desc_t* desc); void _mi_options_init(void) { - // called on process load + // called on process load; should not be called before the CRT is initialized! + // (e.g. do not call this from process_init as that may run before CRT initialization) + mi_add_stderr_output(); // now it safe to use stderr for output for(int i = 0; i < _mi_option_last; i++ ) { mi_option_t option = (mi_option_t)i; mi_option_get(option); // initialize @@ -134,7 +138,7 @@ static void mi_out_stderr(const char* msg) { #ifdef _WIN32 // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. - _cputs(msg); + if (!_mi_preloading()) { _cputs(msg); } #else fputs(msg, stderr); #endif @@ -165,23 +169,29 @@ static void mi_out_buf(const char* msg) { memcpy(&out_buf[start], msg, n); } -static void mi_out_buf_flush(mi_output_fun* out) { +static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { if (out==NULL) return; - // claim all (no more output will be added after this point) - size_t count = mi_atomic_addu(&out_len, MI_MAX_DELAY_OUTPUT); + // claim (if `no_more_buf == true`, no more output will be added after this point) + size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; out(out_buf); + if (!no_more_buf) { + out_buf[count] = '\n'; // if continue with the buffer, insert a newline + } } -// The initial default output, outputs to stderr and the delayed output buffer. + +// Once this module is loaded, switch to this routine +// which outputs to stderr and the delayed output buffer. static void mi_out_buf_stderr(const char* msg) { mi_out_stderr(msg); mi_out_buf(msg); } + // -------------------------------------------------------- // Default output handler // -------------------------------------------------------- @@ -193,14 +203,19 @@ static mi_output_fun* volatile mi_out_default; // = NULL static mi_output_fun* mi_out_get_default(void) { mi_output_fun* out = mi_out_default; - return (out == NULL ? &mi_out_buf_stderr : out); + return (out == NULL ? &mi_out_buf : out); } void mi_register_output(mi_output_fun* out) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - if (out!=NULL) mi_out_buf_flush(out); // output the delayed output now + if (out!=NULL) mi_out_buf_flush(out,true); // output all the delayed output now } +// add stderr to the delayed output after the module is loaded +static void mi_add_stderr_output() { + mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr + mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output +} // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. @@ -213,7 +228,7 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT static mi_decl_thread bool recurse = false; void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { - if (_mi_preloading() || recurse) return; + if (recurse) return; if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(); recurse = true; if (prefix != NULL) out(prefix); @@ -227,7 +242,7 @@ void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) { char buf[512]; if (fmt==NULL) return; - if (_mi_preloading() || recurse) return; + if (recurse) return; recurse = true; vsnprintf(buf,sizeof(buf)-1,fmt,args); recurse = false; From c38af8f7c14f33d22832bb24fdacfd41b20ad69f Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:25:41 -0800 Subject: [PATCH 3/3] merge d1d65fbc: make max error messages configurable --- include/mimalloc.h | 1 + src/options.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index b63ed79d..bc817f54 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -271,6 +271,7 @@ typedef enum mi_option_e { mi_option_eager_commit_delay, mi_option_segment_reset, mi_option_os_tag, + mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index 0b3c6c97..0bee74e0 100644 --- a/src/options.c +++ b/src/options.c @@ -14,6 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // toupper #include +static uintptr_t mi_max_error_count = 16; // stop outputting errors after this + static void mi_add_stderr_output(); int mi_version(void) mi_attr_noexcept { @@ -68,7 +70,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 100, UNINIT, MI_OPTION(os_tag) } // only apple specific for now but might serve more or less related purpose + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; static void mi_option_init(mi_option_desc_t* desc); @@ -85,6 +88,7 @@ void _mi_options_init(void) { _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } } + mi_max_error_count = mi_option_get(mi_option_max_errors); } long mi_option_get(mi_option_t option) { @@ -275,7 +279,7 @@ void _mi_verbose_message(const char* fmt, ...) { void _mi_error_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, "mimalloc: error: ", fmt, args); @@ -285,7 +289,7 @@ void _mi_error_message(const char* fmt, ...) { void _mi_warning_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args);