# HG changeset patch # User Bram Moolenaar # Date 1679003104 -3600 # Node ID 98774a275d6dd6194152e8893f47859bc21274c0 # Parent 49fdfec64a4c24e54e1f8e8ce7edd4aaabb77158 patch 9.0.1411: accuracy of profiling is not optimal Commit: https://github.com/vim/vim/commit/076de79ad832558267b3ff903c048df2f4c1a5d6 Author: Ernie Rael Date: Thu Mar 16 21:43:15 2023 +0000 patch 9.0.1411: accuracy of profiling is not optimal Problem: Accuracy of profiling is not optimal. Solution: Use CLOCK_MONOTONIC if possible. (Ernie Rael, closes https://github.com/vim/vim/issues/12129) diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -7150,7 +7150,8 @@ reltimestr({time}) *reltimestr()* call MyFunction() echo reltimestr(reltime(start)) < Note that overhead for the commands will be added to the time. - The accuracy depends on the system. + The accuracy depends on the system. Use reltimefloat() for the + greatest accuracy which is nanoseconds on some systems. Leading spaces are used to make the string align nicely. You can use split() to remove it. > echo split(reltimestr(reltime(start)))[0] @@ -10753,6 +10754,7 @@ persistent_undo Compiled with support f postscript Compiled with PostScript file printing. printer Compiled with |:hardcopy| support. profile Compiled with |:profile| support. +prof_nsec Profile results are in nano seconds. python Python 2.x interface available. |has-python| python_compiled Compiled with Python 2.x interface. |has-python| python_dynamic Python 2.x interface is dynamically loaded. |has-python| diff --git a/runtime/doc/repeat.txt b/runtime/doc/repeat.txt --- a/runtime/doc/repeat.txt +++ b/runtime/doc/repeat.txt @@ -1148,9 +1148,10 @@ long you take to respond to the input() Profiling should give a good indication of where time is spent, but keep in mind there are various things that may clobber the results: -- The accuracy of the time measured depends on the gettimeofday() system - function. It may only be as accurate as 1/100 second, even though the times - are displayed in micro seconds. +- The accuracy of the time measured depends on the gettimeofday(), or + clock_gettime if available, system function. The accuracy ranges from 1/100 + second to nano seconds. With clock_gettime the times are displayed in nano + seconds, otherwise micro seconds. You can use `has("prof_nsec")`. - Real elapsed time is measured, if other processes are busy they may cause delays at unpredictable moments. You may want to run the profiling several diff --git a/src/evalfunc.c b/src/evalfunc.c --- a/src/evalfunc.c +++ b/src/evalfunc.c @@ -6145,6 +6145,13 @@ f_has(typval_T *argvars, typval_T *rettv 0 #endif }, + {"prof_nsec", +#ifdef HAVE_TIMER_CREATE + 1 +#else + 0 +#endif + }, {"reltime", #ifdef FEAT_RELTIME 1 @@ -8226,7 +8233,7 @@ init_srand(UINT32_T *x) # if defined(MSWIN) *x = (UINT32_T)res.LowPart; # else - *x = (UINT32_T)res.tv_usec; + *x = (UINT32_T)res.tv_fsec; # endif #else *x = vim_time(); diff --git a/src/profiler.c b/src/profiler.c --- a/src/profiler.c +++ b/src/profiler.c @@ -24,7 +24,7 @@ profile_start(proftime_T *tm) # ifdef MSWIN QueryPerformanceCounter(tm); # else - gettimeofday(tm, NULL); + PROF_GET_TIME(tm); # endif } @@ -40,12 +40,12 @@ profile_end(proftime_T *tm) QueryPerformanceCounter(&now); tm->QuadPart = now.QuadPart - tm->QuadPart; # else - gettimeofday(&now, NULL); - tm->tv_usec = now.tv_usec - tm->tv_usec; + PROF_GET_TIME(&now); + tm->tv_fsec = now.tv_fsec - tm->tv_fsec; tm->tv_sec = now.tv_sec - tm->tv_sec; - if (tm->tv_usec < 0) + if (tm->tv_fsec < 0) { - tm->tv_usec += 1000000; + tm->tv_fsec += TV_FSEC_SEC; --tm->tv_sec; } # endif @@ -60,11 +60,11 @@ profile_sub(proftime_T *tm, proftime_T * # ifdef MSWIN tm->QuadPart -= tm2->QuadPart; # else - tm->tv_usec -= tm2->tv_usec; + tm->tv_fsec -= tm2->tv_fsec; tm->tv_sec -= tm2->tv_sec; - if (tm->tv_usec < 0) + if (tm->tv_fsec < 0) { - tm->tv_usec += 1000000; + tm->tv_fsec += TV_FSEC_SEC; --tm->tv_sec; } # endif @@ -85,7 +85,7 @@ profile_msg(proftime_T *tm) QueryPerformanceFrequency(&fr); sprintf(buf, "%10.6lf", (double)tm->QuadPart / (double)fr.QuadPart); # else - sprintf(buf, "%3ld.%06ld", (long)tm->tv_sec, (long)tm->tv_usec); + sprintf(buf, PROF_TIME_FORMAT, (long)tm->tv_sec, (long)tm->tv_fsec); # endif return buf; } @@ -102,7 +102,7 @@ profile_float(proftime_T *tm) QueryPerformanceFrequency(&fr); return (float_T)tm->QuadPart / (float_T)fr.QuadPart; # else - return (float_T)tm->tv_sec + (float_T)tm->tv_usec / 1000000.0; + return (float_T)tm->tv_sec + (float_T)tm->tv_fsec / (float_T)TV_FSEC_SEC; # endif } @@ -123,12 +123,12 @@ profile_setlimit(long msec, proftime_T * QueryPerformanceFrequency(&fr); tm->QuadPart += (LONGLONG)((double)msec / 1000.0 * (double)fr.QuadPart); # else - long usec; + long fsec; - gettimeofday(tm, NULL); - usec = (long)tm->tv_usec + (long)msec * 1000; - tm->tv_usec = usec % 1000000L; - tm->tv_sec += usec / 1000000L; + PROF_GET_TIME(tm); + fsec = (long)tm->tv_fsec + (long)msec * (TV_FSEC_SEC / 1000); + tm->tv_fsec = fsec % (long)TV_FSEC_SEC; + tm->tv_sec += fsec / (long)TV_FSEC_SEC; # endif } } @@ -149,9 +149,9 @@ profile_passed_limit(proftime_T *tm) # else if (tm->tv_sec == 0) // timer was not set return FALSE; - gettimeofday(&now, NULL); + PROF_GET_TIME(&now); return (now.tv_sec > tm->tv_sec - || (now.tv_sec == tm->tv_sec && now.tv_usec > tm->tv_usec)); + || (now.tv_sec == tm->tv_sec && now.tv_fsec > tm->tv_fsec)); # endif } @@ -164,7 +164,7 @@ profile_zero(proftime_T *tm) # ifdef MSWIN tm->QuadPart = 0; # else - tm->tv_usec = 0; + tm->tv_fsec = 0; tm->tv_sec = 0; # endif } @@ -189,10 +189,11 @@ profile_divide(proftime_T *tm, int count # ifdef MSWIN tm2->QuadPart = tm->QuadPart / count; # else - double usec = (tm->tv_sec * 1000000.0 + tm->tv_usec) / count; + double fsec = (tm->tv_sec * (float_T)TV_FSEC_SEC + tm->tv_fsec) + / count; - tm2->tv_sec = floor(usec / 1000000.0); - tm2->tv_usec = vim_round(usec - (tm2->tv_sec * 1000000.0)); + tm2->tv_sec = floor(fsec / (float_T)TV_FSEC_SEC); + tm2->tv_fsec = vim_round(fsec - (tm2->tv_sec * (float_T)TV_FSEC_SEC)); # endif } } @@ -213,11 +214,11 @@ profile_add(proftime_T *tm, proftime_T * # ifdef MSWIN tm->QuadPart += tm2->QuadPart; # else - tm->tv_usec += tm2->tv_usec; + tm->tv_fsec += tm2->tv_fsec; tm->tv_sec += tm2->tv_sec; - if (tm->tv_usec >= 1000000) + if (tm->tv_fsec >= TV_FSEC_SEC) { - tm->tv_usec -= 1000000; + tm->tv_fsec -= TV_FSEC_SEC; ++tm->tv_sec; } # endif @@ -237,7 +238,7 @@ profile_self(proftime_T *self, proftime_ #else if (total->tv_sec < children->tv_sec || (total->tv_sec == children->tv_sec - && total->tv_usec <= children->tv_usec)) + && total->tv_fsec <= children->tv_fsec)) return; #endif profile_add(self, total); @@ -274,7 +275,7 @@ profile_equal(proftime_T *tm1, proftime_ # ifdef MSWIN return (tm1->QuadPart == tm2->QuadPart); # else - return (tm1->tv_usec == tm2->tv_usec && tm1->tv_sec == tm2->tv_sec); + return (tm1->tv_fsec == tm2->tv_fsec && tm1->tv_sec == tm2->tv_sec); # endif } @@ -288,7 +289,7 @@ profile_cmp(const proftime_T *tm1, const return (int)(tm2->QuadPart - tm1->QuadPart); # else if (tm1->tv_sec == tm2->tv_sec) - return tm2->tv_usec - tm1->tv_usec; + return tm2->tv_fsec - tm1->tv_fsec; return tm2->tv_sec - tm1->tv_sec; # endif } @@ -551,16 +552,16 @@ prof_func_line( { fprintf(fd, "%5d ", count); if (prefer_self && profile_equal(total, self)) - fprintf(fd, " "); + fprintf(fd, PROF_TIME_BLANK); else fprintf(fd, "%s ", profile_msg(total)); if (!prefer_self && profile_equal(total, self)) - fprintf(fd, " "); + fprintf(fd, PROF_TIME_BLANK); else fprintf(fd, "%s ", profile_msg(self)); } else - fprintf(fd, " "); + fprintf(fd, " %s%s", PROF_TIME_BLANK, PROF_TIME_BLANK); } static void @@ -575,7 +576,7 @@ prof_sort_list( ufunc_T *fp; fprintf(fd, "FUNCTIONS SORTED ON %s TIME\n", title); - fprintf(fd, "count total (s) self (s) function\n"); + fprintf(fd, "%s function\n", PROF_TOTALS_HEADER); for (i = 0; i < 20 && i < st_len; ++i) { fp = sorttab[i]; @@ -858,7 +859,7 @@ func_dump_profile(FILE *fd) fprintf(fd, "Total time: %s\n", profile_msg(&fp->uf_tm_total)); fprintf(fd, " Self time: %s\n", profile_msg(&fp->uf_tm_self)); fprintf(fd, "\n"); - fprintf(fd, "count total (s) self (s)\n"); + fprintf(fd, "%s\n", PROF_TOTALS_HEADER); for (i = 0; i < fp->uf_lines.ga_len; ++i) { @@ -948,7 +949,7 @@ script_dump_profile(FILE *fd) fprintf(fd, "Total time: %s\n", profile_msg(&si->sn_pr_total)); fprintf(fd, " Self time: %s\n", profile_msg(&si->sn_pr_self)); fprintf(fd, "\n"); - fprintf(fd, "count total (s) self (s)\n"); + fprintf(fd, "%s\n", PROF_TOTALS_HEADER); sfd = mch_fopen((char *)si->sn_name, "r"); if (sfd == NULL) diff --git a/src/testdir/test_profile.vim b/src/testdir/test_profile.vim --- a/src/testdir/test_profile.vim +++ b/src/testdir/test_profile.vim @@ -6,8 +6,13 @@ CheckFeature profile source shared.vim source screendump.vim -let s:header = 'count total (s) self (s)' -let s:header_func = 'count total (s) self (s) function' +if has('prof_nsec') + let s:header = 'count total (s) self (s)' + let s:header_func = 'count total (s) self (s) function' +else + let s:header = 'count total (s) self (s)' + let s:header_func = 'count total (s) self (s) function' +endif func Test_profile_func() call RunProfileFunc('func', 'let', 'let') diff --git a/src/testdir/test_sleep.vim b/src/testdir/test_sleep.vim --- a/src/testdir/test_sleep.vim +++ b/src/testdir/test_sleep.vim @@ -1,11 +1,7 @@ " Test for sleep and sleep! commands func! s:get_time_ms() - let timestr = reltimestr(reltime()) - let dotidx = stridx(timestr, '.') - let sec = str2nr(timestr[:dotidx]) - let msec = str2nr(timestr[dotidx + 1:]) - return (sec * 1000) + (msec / 1000) + return float2nr(reltimefloat(reltime()) * 1000) endfunc func! s:assert_takes_longer(cmd, time_ms) diff --git a/src/time.c b/src/time.c --- a/src/time.c +++ b/src/time.c @@ -163,7 +163,7 @@ list2proftime(typval_T *arg, proftime_T tm->LowPart = n2; # else tm->tv_sec = n1; - tm->tv_usec = n2; + tm->tv_fsec = n2; # endif return error ? FAIL : OK; } @@ -222,7 +222,7 @@ f_reltime(typval_T *argvars UNUSED, typv n2 = res.LowPart; # else n1 = res.tv_sec; - n2 = res.tv_usec; + n2 = res.tv_fsec; # endif list_append_number(rettv->vval.v_list, (varnumber_T)n1); list_append_number(rettv->vval.v_list, (varnumber_T)n2); @@ -258,6 +258,7 @@ f_reltimefloat(typval_T *argvars UNUSED, void f_reltimestr(typval_T *argvars UNUSED, typval_T *rettv) { + static char buf[50]; # ifdef FEAT_RELTIME proftime_T tm; # endif @@ -269,7 +270,15 @@ f_reltimestr(typval_T *argvars UNUSED, t return; if (list2proftime(&argvars[0], &tm) == OK) + { +# ifdef MSWIN rettv->vval.v_string = vim_strsave((char_u *)profile_msg(&tm)); +# else + long usec = tm.tv_fsec / (TV_FSEC_SEC / 1000000); + sprintf(buf, "%3ld.%06ld", (long)tm.tv_sec, usec); + rettv->vval.v_string = vim_strsave((char_u *)buf); +# endif + } else if (in_vim9script()) emsg(_(e_invalid_argument)); # endif @@ -392,7 +401,7 @@ static timer_T *first_timer = NULL; static long last_timer_id = 0; /* - * Return time left until "due". Negative if past "due". + * Return time left, in "msec", until "due". Negative if past "due". */ long proftime_time_left(proftime_T *due, proftime_T *now) @@ -409,7 +418,7 @@ proftime_time_left(proftime_T *due, prof if (now->tv_sec > due->tv_sec) return 0; return (due->tv_sec - now->tv_sec) * 1000 - + (due->tv_usec - now->tv_usec) / 1000; + + (due->tv_fsec - now->tv_fsec) / (TV_FSEC_SEC / 1000); # endif } diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -696,6 +696,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 1411, +/**/ 1410, /**/ 1409, diff --git a/src/vim.h b/src/vim.h --- a/src/vim.h +++ b/src/vim.h @@ -1869,8 +1869,27 @@ typedef void *vim_acl_T; // dummy t #if (defined(FEAT_PROFILE) || defined(FEAT_RELTIME)) && !defined(PROTO) # ifdef MSWIN typedef LARGE_INTEGER proftime_T; +# define PROF_TIME_BLANK " " +# define PROF_TOTALS_HEADER "count total (s) self (s)" # else + // Use tv_fsec for fraction of second (micro or nano) of proftime_T +# if defined(HAVE_TIMER_CREATE) +typedef struct timespec proftime_T; +# define PROF_GET_TIME(tm) clock_gettime(CLOCK_MONOTONIC, tm) +# define tv_fsec tv_nsec +# define TV_FSEC_SEC 1000000000L +# define PROF_TIME_FORMAT "%3ld.%09ld" +# define PROF_TIME_BLANK " " +# define PROF_TOTALS_HEADER "count total (s) self (s)" +# else typedef struct timeval proftime_T; +# define PROF_GET_TIME(tm) gettimeofday(tm, NULL) +# define tv_fsec tv_usec +# define TV_FSEC_SEC 1000000 +# define PROF_TIME_FORMAT "%3ld.%06ld" +# define PROF_TIME_BLANK " " +# define PROF_TOTALS_HEADER "count total (s) self (s)" +# endif # endif #else typedef int proftime_T; // dummy for function prototypes