changeset 33039:21f2838a4dd9 v9.0.1810

patch 9.0.1810: camel-case spelling has issues with digits Commit: https://github.com/vim/vim/commit/d08745040bb82c5e9a81b6c8a414e50951642492 Author: LemonBoy <thatlemon@gmail.com> Date: Sun Aug 27 21:52:27 2023 +0200 patch 9.0.1810: camel-case spelling has issues with digits Problem: camel-case spelling has issues with digits Solution: Improve the camCase spell checking by taking digits and caps into account Rewrite the conditions to check for word boundaries by taking into account the presence of digits and all-caps sequences such as acronyms. closes: #12644 closes: #12933 Signed-off-by: Christian Brabandt <cb@256bit.org> Co-authored-by: LemonBoy <thatlemon@gmail.com>
author Christian Brabandt <cb@256bit.org>
date Sun, 27 Aug 2023 22:00:06 +0200
parents 3b797830f9fd
children a5279c37a1f5
files src/spell.c src/testdir/test_spell.vim src/version.c
diffstat 3 files changed, 105 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/src/spell.c
+++ b/src/spell.c
@@ -125,6 +125,11 @@ static int spell_mb_isword_class(int cl,
 #define FIND_COMPOUND	    3	// find case-folded compound word
 #define FIND_KEEPCOMPOUND   4	// find keep-case compound word
 
+// type values for get_char_type
+#define CHAR_OTHER	0
+#define CHAR_UPPER	1
+#define CHAR_DIGIT	2
+
 static void find_word(matchinf_T *mip, int mode);
 static void find_prefix(matchinf_T *mip, int mode);
 static int fold_more(matchinf_T *mip);
@@ -138,6 +143,7 @@ static void spell_soundfold_sal(slang_T 
 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res);
 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum);
 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum);
+static char_u *advance_camelcase_word(char_u *p, win_T *wp, int *is_camel_case);
 
 /*
  * Main spell-checking function.
@@ -170,7 +176,7 @@ spell_check(
     int		lpi;
     int		count_word = docount;
     int		use_camel_case = *wp->w_s->b_p_spo != NUL;
-    int		camel_case = 0;
+    int		is_camel_case = FALSE;
 
     // A word never starts at a space or a control character.  Return quickly
     // then, skipping over the character.
@@ -202,27 +208,15 @@ spell_check(
     mi.mi_fend = ptr;
     if (spell_iswordp(mi.mi_fend, wp))
     {
-	int prev_upper;
-	int this_upper = FALSE;  // init for gcc
-
 	if (use_camel_case)
-	{
-	    c = PTR2CHAR(mi.mi_fend);
-	    this_upper = SPELL_ISUPPER(c);
-	}
-
-	do
+	    mi.mi_fend = advance_camelcase_word(ptr, wp, &is_camel_case);
+	else
 	{
-	    MB_PTR_ADV(mi.mi_fend);
-	    if (use_camel_case)
+	    do
 	    {
-		prev_upper = this_upper;
-		c = PTR2CHAR(mi.mi_fend);
-		this_upper = SPELL_ISUPPER(c);
-		camel_case = !prev_upper && this_upper;
-	    }
-	} while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp)
-							       && !camel_case);
+		MB_PTR_ADV(mi.mi_fend);
+	    } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp));
+	}
 
 	if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL)
 	{
@@ -253,7 +247,7 @@ spell_check(
 							     MAXWLEN + 1);
     mi.mi_fwordlen = (int)STRLEN(mi.mi_fword);
 
-    if (camel_case && mi.mi_fwordlen > 0)
+    if (is_camel_case && mi.mi_fwordlen > 0)
 	// Introduce a fake word end space into the folded word.
 	mi.mi_fword[mi.mi_fwordlen - 1] = ' ';
 
@@ -388,6 +382,75 @@ spell_check(
 }
 
 /*
+ * Determine the type of character 'c'.
+ */
+    static int
+get_char_type(int c)
+{
+    if (VIM_ISDIGIT(c))
+	return CHAR_DIGIT;
+    if (SPELL_ISUPPER(c))
+	return CHAR_UPPER;
+    return CHAR_OTHER;
+}
+
+/*
+ * Returns a pointer to the end of the word starting at "str".
+ * Supports camelCase words.
+ */
+    static char_u *
+advance_camelcase_word(char_u *str, win_T *wp, int *is_camel_case)
+{
+    int last_type, last_last_type, this_type;
+    int c;
+    char_u *end = str;
+
+    *is_camel_case = FALSE;
+
+    if (*str == NUL)
+	return str;
+
+    c = PTR2CHAR(end);
+    MB_PTR_ADV(end);
+    // We need at most the types of the type of the last two chars.
+    last_last_type = -1;
+    last_type = get_char_type(c);
+
+    while (*end != NUL && spell_iswordp(end, wp))
+    {
+	c = PTR2CHAR(end);
+	this_type = get_char_type(c);
+
+	if (last_last_type == CHAR_UPPER && last_type == CHAR_UPPER
+		&& this_type == CHAR_OTHER)
+	{
+	    // Handle the following cases:
+	    // UpperUpperLower
+	    *is_camel_case = TRUE;
+	    // Back up by one char.
+	    MB_PTR_BACK(str, end);
+	    break;
+	}
+	else if ((this_type == CHAR_UPPER && last_type == CHAR_OTHER)
+		|| (this_type != last_type
+		    && (this_type == CHAR_DIGIT || last_type == CHAR_DIGIT)))
+	{
+	    // Handle the following cases:
+	    // LowerUpper LowerDigit UpperDigit DigitUpper DigitLower
+	    *is_camel_case = TRUE;
+	    break;
+	}
+
+	last_last_type = last_type;
+	last_type = this_type;
+
+	MB_PTR_ADV(end);
+    }
+
+    return end;
+}
+
+/*
  * Check if the word at "mip->mi_word" is in the tree.
  * When "mode" is FIND_FOLDWORD check in fold-case word tree.
  * When "mode" is FIND_KEEPWORD check in keep-case word tree.
--- a/src/testdir/test_spell.vim
+++ b/src/testdir/test_spell.vim
@@ -132,6 +132,26 @@ foobar/?
   set spell&
 endfunc
 
+func Test_spell_camelcase()
+  set spell spelloptions=camel
+  let words = [
+      \ 'UPPER',
+      \ 'lower',
+      \ 'mixedCase',
+      \ 'HTML',
+      \ 'XMLHttpRequest',
+      \ 'foo123bar',
+      \ '12345678',
+      \ 'HELLO123world',
+      \]
+
+  for word in words
+    call assert_equal(['', ''],  spellbadword(word))
+  endfor
+
+  set spell& spelloptions&
+endfunc
+
 func Test_spell_file_missing()
   let s:spell_file_missing = 0
   augroup TestSpellFileMissing
--- a/src/version.c
+++ b/src/version.c
@@ -700,6 +700,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1810,
+/**/
     1809,
 /**/
     1808,