changeset 32160:98774a275d6d v9.0.1411

patch 9.0.1411: accuracy of profiling is not optimal Commit: https://github.com/vim/vim/commit/076de79ad832558267b3ff903c048df2f4c1a5d6 Author: Ernie Rael <errael@raelity.com> Date: Thu Mar 16 21:43:15 2023 +0000 patch 9.0.1411: accuracy of profiling is not optimal Problem: Accuracy of profiling is not optimal. Solution: Use CLOCK_MONOTONIC if possible. (Ernie Rael, closes https://github.com/vim/vim/issues/12129)
author Bram Moolenaar <Bram@vim.org>
date Thu, 16 Mar 2023 22:45:04 +0100
parents 49fdfec64a4c
children c234bd6a1cf7
files runtime/doc/builtin.txt runtime/doc/repeat.txt src/evalfunc.c src/profiler.c src/testdir/test_profile.vim src/testdir/test_sleep.vim src/time.c src/version.c src/vim.h
diffstat 9 files changed, 91 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/runtime/doc/builtin.txt
+++ b/runtime/doc/builtin.txt
@@ -7150,7 +7150,8 @@ reltimestr({time})				*reltimestr()*
 			call MyFunction()
 			echo reltimestr(reltime(start))
 <		Note that overhead for the commands will be added to the time.
-		The accuracy depends on the system.
+		The accuracy depends on the system. Use reltimefloat() for the
+		greatest accuracy which is nanoseconds on some systems.
 		Leading spaces are used to make the string align nicely.  You
 		can use split() to remove it. >
 			echo split(reltimestr(reltime(start)))[0]
@@ -10753,6 +10754,7 @@ persistent_undo		Compiled with support f
 postscript		Compiled with PostScript file printing.
 printer			Compiled with |:hardcopy| support.
 profile			Compiled with |:profile| support.
+prof_nsec		Profile results are in nano seconds.
 python			Python 2.x interface available. |has-python|
 python_compiled		Compiled with Python 2.x interface. |has-python|
 python_dynamic		Python 2.x interface is dynamically loaded. |has-python|
--- a/runtime/doc/repeat.txt
+++ b/runtime/doc/repeat.txt
@@ -1148,9 +1148,10 @@ long you take to respond to the input() 
 Profiling should give a good indication of where time is spent, but keep in
 mind there are various things that may clobber the results:
 
-- The accuracy of the time measured depends on the gettimeofday() system
-  function.  It may only be as accurate as 1/100 second, even though the times
-  are displayed in micro seconds.
+- The accuracy of the time measured depends on the gettimeofday(), or
+  clock_gettime if available, system function. The accuracy ranges from 1/100
+  second to nano seconds. With clock_gettime the times are displayed in nano
+  seconds, otherwise micro seconds.  You can use `has("prof_nsec")`.
 
 - Real elapsed time is measured, if other processes are busy they may cause
   delays at unpredictable moments.  You may want to run the profiling several
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -6145,6 +6145,13 @@ f_has(typval_T *argvars, typval_T *rettv
 		0
 #endif
 		},
+	{"prof_nsec",
+#ifdef HAVE_TIMER_CREATE
+		1
+#else
+		0
+#endif
+		},
 	{"reltime",
 #ifdef FEAT_RELTIME
 		1
@@ -8226,7 +8233,7 @@ init_srand(UINT32_T *x)
 #  if defined(MSWIN)
 	    *x = (UINT32_T)res.LowPart;
 #  else
-	    *x = (UINT32_T)res.tv_usec;
+	    *x = (UINT32_T)res.tv_fsec;
 #  endif
 #else
 	    *x = vim_time();
--- a/src/profiler.c
+++ b/src/profiler.c
@@ -24,7 +24,7 @@ profile_start(proftime_T *tm)
 # ifdef MSWIN
     QueryPerformanceCounter(tm);
 # else
-    gettimeofday(tm, NULL);
+    PROF_GET_TIME(tm);
 # endif
 }
 
@@ -40,12 +40,12 @@ profile_end(proftime_T *tm)
     QueryPerformanceCounter(&now);
     tm->QuadPart = now.QuadPart - tm->QuadPart;
 # else
-    gettimeofday(&now, NULL);
-    tm->tv_usec = now.tv_usec - tm->tv_usec;
+    PROF_GET_TIME(&now);
+    tm->tv_fsec = now.tv_fsec - tm->tv_fsec;
     tm->tv_sec = now.tv_sec - tm->tv_sec;
-    if (tm->tv_usec < 0)
+    if (tm->tv_fsec < 0)
     {
-	tm->tv_usec += 1000000;
+	tm->tv_fsec += TV_FSEC_SEC;
 	--tm->tv_sec;
     }
 # endif
@@ -60,11 +60,11 @@ profile_sub(proftime_T *tm, proftime_T *
 # ifdef MSWIN
     tm->QuadPart -= tm2->QuadPart;
 # else
-    tm->tv_usec -= tm2->tv_usec;
+    tm->tv_fsec -= tm2->tv_fsec;
     tm->tv_sec -= tm2->tv_sec;
-    if (tm->tv_usec < 0)
+    if (tm->tv_fsec < 0)
     {
-	tm->tv_usec += 1000000;
+	tm->tv_fsec += TV_FSEC_SEC;
 	--tm->tv_sec;
     }
 # endif
@@ -85,7 +85,7 @@ profile_msg(proftime_T *tm)
     QueryPerformanceFrequency(&fr);
     sprintf(buf, "%10.6lf", (double)tm->QuadPart / (double)fr.QuadPart);
 # else
-    sprintf(buf, "%3ld.%06ld", (long)tm->tv_sec, (long)tm->tv_usec);
+    sprintf(buf, PROF_TIME_FORMAT, (long)tm->tv_sec, (long)tm->tv_fsec);
 # endif
     return buf;
 }
@@ -102,7 +102,7 @@ profile_float(proftime_T *tm)
     QueryPerformanceFrequency(&fr);
     return (float_T)tm->QuadPart / (float_T)fr.QuadPart;
 # else
-    return (float_T)tm->tv_sec + (float_T)tm->tv_usec / 1000000.0;
+    return (float_T)tm->tv_sec + (float_T)tm->tv_fsec / (float_T)TV_FSEC_SEC;
 # endif
 }
 
@@ -123,12 +123,12 @@ profile_setlimit(long msec, proftime_T *
 	QueryPerformanceFrequency(&fr);
 	tm->QuadPart += (LONGLONG)((double)msec / 1000.0 * (double)fr.QuadPart);
 # else
-	long	    usec;
+	long	    fsec;
 
-	gettimeofday(tm, NULL);
-	usec = (long)tm->tv_usec + (long)msec * 1000;
-	tm->tv_usec = usec % 1000000L;
-	tm->tv_sec += usec / 1000000L;
+	PROF_GET_TIME(tm);
+	fsec = (long)tm->tv_fsec + (long)msec * (TV_FSEC_SEC / 1000);
+	tm->tv_fsec = fsec % (long)TV_FSEC_SEC;
+	tm->tv_sec += fsec / (long)TV_FSEC_SEC;
 # endif
     }
 }
@@ -149,9 +149,9 @@ profile_passed_limit(proftime_T *tm)
 # else
     if (tm->tv_sec == 0)    // timer was not set
 	return FALSE;
-    gettimeofday(&now, NULL);
+    PROF_GET_TIME(&now);
     return (now.tv_sec > tm->tv_sec
-	    || (now.tv_sec == tm->tv_sec && now.tv_usec > tm->tv_usec));
+	    || (now.tv_sec == tm->tv_sec && now.tv_fsec > tm->tv_fsec));
 # endif
 }
 
@@ -164,7 +164,7 @@ profile_zero(proftime_T *tm)
 # ifdef MSWIN
     tm->QuadPart = 0;
 # else
-    tm->tv_usec = 0;
+    tm->tv_fsec = 0;
     tm->tv_sec = 0;
 # endif
 }
@@ -189,10 +189,11 @@ profile_divide(proftime_T *tm, int count
 # ifdef MSWIN
 	tm2->QuadPart = tm->QuadPart / count;
 # else
-	double usec = (tm->tv_sec * 1000000.0 + tm->tv_usec) / count;
+	double fsec = (tm->tv_sec * (float_T)TV_FSEC_SEC + tm->tv_fsec)
+								       / count;
 
-	tm2->tv_sec = floor(usec / 1000000.0);
-	tm2->tv_usec = vim_round(usec - (tm2->tv_sec * 1000000.0));
+	tm2->tv_sec = floor(fsec / (float_T)TV_FSEC_SEC);
+	tm2->tv_fsec = vim_round(fsec - (tm2->tv_sec * (float_T)TV_FSEC_SEC));
 # endif
     }
 }
@@ -213,11 +214,11 @@ profile_add(proftime_T *tm, proftime_T *
 # ifdef MSWIN
     tm->QuadPart += tm2->QuadPart;
 # else
-    tm->tv_usec += tm2->tv_usec;
+    tm->tv_fsec += tm2->tv_fsec;
     tm->tv_sec += tm2->tv_sec;
-    if (tm->tv_usec >= 1000000)
+    if (tm->tv_fsec >= TV_FSEC_SEC)
     {
-	tm->tv_usec -= 1000000;
+	tm->tv_fsec -= TV_FSEC_SEC;
 	++tm->tv_sec;
     }
 # endif
@@ -237,7 +238,7 @@ profile_self(proftime_T *self, proftime_
 #else
     if (total->tv_sec < children->tv_sec
 	    || (total->tv_sec == children->tv_sec
-		&& total->tv_usec <= children->tv_usec))
+		&& total->tv_fsec <= children->tv_fsec))
 	return;
 #endif
     profile_add(self, total);
@@ -274,7 +275,7 @@ profile_equal(proftime_T *tm1, proftime_
 # ifdef MSWIN
     return (tm1->QuadPart == tm2->QuadPart);
 # else
-    return (tm1->tv_usec == tm2->tv_usec && tm1->tv_sec == tm2->tv_sec);
+    return (tm1->tv_fsec == tm2->tv_fsec && tm1->tv_sec == tm2->tv_sec);
 # endif
 }
 
@@ -288,7 +289,7 @@ profile_cmp(const proftime_T *tm1, const
     return (int)(tm2->QuadPart - tm1->QuadPart);
 # else
     if (tm1->tv_sec == tm2->tv_sec)
-	return tm2->tv_usec - tm1->tv_usec;
+	return tm2->tv_fsec - tm1->tv_fsec;
     return tm2->tv_sec - tm1->tv_sec;
 # endif
 }
@@ -551,16 +552,16 @@ prof_func_line(
     {
 	fprintf(fd, "%5d ", count);
 	if (prefer_self && profile_equal(total, self))
-	    fprintf(fd, "           ");
+	    fprintf(fd, PROF_TIME_BLANK);
 	else
 	    fprintf(fd, "%s ", profile_msg(total));
 	if (!prefer_self && profile_equal(total, self))
-	    fprintf(fd, "           ");
+	    fprintf(fd, PROF_TIME_BLANK);
 	else
 	    fprintf(fd, "%s ", profile_msg(self));
     }
     else
-	fprintf(fd, "                            ");
+	fprintf(fd, "      %s%s", PROF_TIME_BLANK, PROF_TIME_BLANK);
 }
 
     static void
@@ -575,7 +576,7 @@ prof_sort_list(
     ufunc_T	*fp;
 
     fprintf(fd, "FUNCTIONS SORTED ON %s TIME\n", title);
-    fprintf(fd, "count  total (s)   self (s)  function\n");
+    fprintf(fd, "%s  function\n", PROF_TOTALS_HEADER);
     for (i = 0; i < 20 && i < st_len; ++i)
     {
 	fp = sorttab[i];
@@ -858,7 +859,7 @@ func_dump_profile(FILE *fd)
 		fprintf(fd, "Total time: %s\n", profile_msg(&fp->uf_tm_total));
 		fprintf(fd, " Self time: %s\n", profile_msg(&fp->uf_tm_self));
 		fprintf(fd, "\n");
-		fprintf(fd, "count  total (s)   self (s)\n");
+		fprintf(fd, "%s\n", PROF_TOTALS_HEADER);
 
 		for (i = 0; i < fp->uf_lines.ga_len; ++i)
 		{
@@ -948,7 +949,7 @@ script_dump_profile(FILE *fd)
 	    fprintf(fd, "Total time: %s\n", profile_msg(&si->sn_pr_total));
 	    fprintf(fd, " Self time: %s\n", profile_msg(&si->sn_pr_self));
 	    fprintf(fd, "\n");
-	    fprintf(fd, "count  total (s)   self (s)\n");
+	    fprintf(fd, "%s\n", PROF_TOTALS_HEADER);
 
 	    sfd = mch_fopen((char *)si->sn_name, "r");
 	    if (sfd == NULL)
--- a/src/testdir/test_profile.vim
+++ b/src/testdir/test_profile.vim
@@ -6,8 +6,13 @@ CheckFeature profile
 source shared.vim
 source screendump.vim
 
-let s:header = 'count  total (s)   self (s)'
-let s:header_func = 'count  total (s)   self (s)  function'
+if has('prof_nsec')
+  let s:header = 'count     total (s)      self (s)'
+  let s:header_func = 'count     total (s)      self (s)  function'
+else
+  let s:header = 'count  total (s)   self (s)'
+  let s:header_func = 'count  total (s)   self (s)  function'
+endif
 
 func Test_profile_func()
   call RunProfileFunc('func', 'let', 'let')
--- a/src/testdir/test_sleep.vim
+++ b/src/testdir/test_sleep.vim
@@ -1,11 +1,7 @@
 " Test for sleep and sleep! commands
 
 func! s:get_time_ms()
-  let timestr = reltimestr(reltime())
-  let dotidx = stridx(timestr, '.')
-  let sec = str2nr(timestr[:dotidx])
-  let msec = str2nr(timestr[dotidx + 1:])
-  return (sec * 1000) + (msec / 1000)
+  return float2nr(reltimefloat(reltime()) * 1000)
 endfunc
 
 func! s:assert_takes_longer(cmd, time_ms)
--- a/src/time.c
+++ b/src/time.c
@@ -163,7 +163,7 @@ list2proftime(typval_T *arg, proftime_T 
     tm->LowPart = n2;
 #  else
     tm->tv_sec = n1;
-    tm->tv_usec = n2;
+    tm->tv_fsec = n2;
 #  endif
     return error ? FAIL : OK;
 }
@@ -222,7 +222,7 @@ f_reltime(typval_T *argvars UNUSED, typv
     n2 = res.LowPart;
 #  else
     n1 = res.tv_sec;
-    n2 = res.tv_usec;
+    n2 = res.tv_fsec;
 #  endif
     list_append_number(rettv->vval.v_list, (varnumber_T)n1);
     list_append_number(rettv->vval.v_list, (varnumber_T)n2);
@@ -258,6 +258,7 @@ f_reltimefloat(typval_T *argvars UNUSED,
     void
 f_reltimestr(typval_T *argvars UNUSED, typval_T *rettv)
 {
+    static char buf[50];
 # ifdef FEAT_RELTIME
     proftime_T	tm;
 # endif
@@ -269,7 +270,15 @@ f_reltimestr(typval_T *argvars UNUSED, t
 	return;
 
     if (list2proftime(&argvars[0], &tm) == OK)
+    {
+# ifdef MSWIN
 	rettv->vval.v_string = vim_strsave((char_u *)profile_msg(&tm));
+# else
+	long usec = tm.tv_fsec / (TV_FSEC_SEC / 1000000);
+	sprintf(buf, "%3ld.%06ld", (long)tm.tv_sec, usec);
+	rettv->vval.v_string = vim_strsave((char_u *)buf);
+# endif
+    }
     else if (in_vim9script())
 	emsg(_(e_invalid_argument));
 # endif
@@ -392,7 +401,7 @@ static timer_T	*first_timer = NULL;
 static long	last_timer_id = 0;
 
 /*
- * Return time left until "due".  Negative if past "due".
+ * Return time left, in "msec", until "due".  Negative if past "due".
  */
     long
 proftime_time_left(proftime_T *due, proftime_T *now)
@@ -409,7 +418,7 @@ proftime_time_left(proftime_T *due, prof
     if (now->tv_sec > due->tv_sec)
 	return 0;
     return (due->tv_sec - now->tv_sec) * 1000
-	+ (due->tv_usec - now->tv_usec) / 1000;
+	+ (due->tv_fsec - now->tv_fsec) / (TV_FSEC_SEC / 1000);
 #  endif
 }
 
--- a/src/version.c
+++ b/src/version.c
@@ -696,6 +696,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1411,
+/**/
     1410,
 /**/
     1409,
--- a/src/vim.h
+++ b/src/vim.h
@@ -1869,8 +1869,27 @@ typedef void	    *vim_acl_T;		// dummy t
 #if (defined(FEAT_PROFILE) || defined(FEAT_RELTIME)) && !defined(PROTO)
 # ifdef MSWIN
 typedef LARGE_INTEGER proftime_T;
+#  define PROF_TIME_BLANK "           "
+#  define PROF_TOTALS_HEADER "count  total (s)   self (s)"
 # else
+   // Use tv_fsec for fraction of second (micro or nano) of proftime_T
+#  if defined(HAVE_TIMER_CREATE)
+typedef struct timespec proftime_T;
+#   define PROF_GET_TIME(tm) clock_gettime(CLOCK_MONOTONIC, tm)
+#   define tv_fsec tv_nsec
+#   define TV_FSEC_SEC 1000000000L
+#   define PROF_TIME_FORMAT "%3ld.%09ld"
+#   define PROF_TIME_BLANK "              "
+#   define PROF_TOTALS_HEADER "count     total (s)      self (s)"
+#  else
 typedef struct timeval proftime_T;
+#   define PROF_GET_TIME(tm) gettimeofday(tm, NULL)
+#   define tv_fsec tv_usec
+#   define TV_FSEC_SEC 1000000
+#   define PROF_TIME_FORMAT "%3ld.%06ld"
+#   define PROF_TIME_BLANK "           "
+#   define PROF_TOTALS_HEADER "count  total (s)   self (s)"
+#  endif
 # endif
 #else
 typedef int proftime_T;	    // dummy for function prototypes