vim: src/charset.c comparison

comparison src/charset.c @ 258:f93df7322443

updated for version 7.0070

author	vimboss
date	Sat, 23 Apr 2005 20:52:00 +0000
parents	4707450c2b33
children	b3c0268f7815

comparison

equal deleted inserted replaced

-:51a4d1c2a95b
+:f93df7322443
 	return mb_get_class(p) >= 2;
 # endif
 return (GET_CHARTAB(buf, *p) != 0);
 }
-static char spell_chartab[256];
+/*
+* The tables used for spelling.  These are only used for the first 256
+* characters.
+*/
+typedef struct spelltab_S
+{
+char_u  st_isw[256];	/* flags: is word char */
+char_u  st_isu[256];	/* flags: is uppercase char */
+char_u  st_fold[256];	/* chars: folded case */
+} spelltab_T;
+static spelltab_T   spelltab;
+static int	    did_set_spelltab;
+#define SPELL_ISWORD	1
+#define SPELL_ISUPPER	2
+static void clear_spell_chartab __ARGS((spelltab_T *sp));
+static int set_spell_finish __ARGS((spelltab_T	*new_st));
+/*
+* Init the chartab used for spelling for ASCII.
+* EBCDIC is not supported!
+*/
+static void
+clear_spell_chartab(sp)
+spelltab_T	*sp;
+{
+int	    i;
+/* Init everything to FALSE. */
+vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
+vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
+for (i = 0; i < 256; ++i)
+	sp->st_fold[i] = i;
+/* We include digits.  A word shouldn't start with a digit, but handling
+* that is done separately. */
+for (i = '0'; i <= '9'; ++i)
+	sp->st_isw[i] = TRUE;
+for (i = 'A'; i <= 'Z'; ++i)
+{
+	sp->st_isw[i] = TRUE;
+	sp->st_isu[i] = TRUE;
+	sp->st_fold[i] = i + 0x20;
+}
+for (i = 'a'; i <= 'z'; ++i)
+	sp->st_isw[i] = TRUE;
+}
 /*
 * Init the chartab used for spelling.  Only depends on 'encoding'.
-* Called once while starting up and when 'encoding' was changed.
+* Called once while starting up and when 'encoding' changes.
-* Unfortunately, we can't use isalpha() here, since the current locale may
+* The default is to use isalpha(), but the spell file should define the word
-* differ from 'encoding'.
+* characters to make it possible that 'encoding' differs from the current
+* locale.
 */
 void
 init_spell_chartab()
 {
 int	    i;
-/* ASCII is always the same, no matter what 'encoding' is used.
+did_set_spelltab = FALSE;
-* EBCDIC is not supported! */
+clear_spell_chartab(&spelltab);
-for (i = 0; i < '0'; ++i)
-	spell_chartab[i] = FALSE;
-/* We include numbers.  A word shouldn't start with a number, but handling
-* that is done separately. */
-for ( ; i <= '9'; ++i)
-	spell_chartab[i] = TRUE;
-for ( ; i < 'A'; ++i)
-	spell_chartab[i] = FALSE;
-for ( ; i <= 'Z'; ++i)
-	spell_chartab[i] = TRUE;
-for ( ; i < 'a'; ++i)
-	spell_chartab[i] = FALSE;
-for ( ; i <= 'z'; ++i)
-	spell_chartab[i] = TRUE;
 #ifdef FEAT_MBYTE
 if (enc_dbcs)
 {
 	/* DBCS: assume double-wide characters are word characters. */
-	for ( ; i <= 255; ++i)
+	for (i = 128; i <= 255; ++i)
 	    if (MB_BYTE2LEN(i) == 2)
-		spell_chartab[i] = TRUE;
+		spelltab.st_isw[i] = TRUE;
-	    else
-		spell_chartab[i] = FALSE;
-}
-else if (STRCMP(p_enc, "cp850") == 0)
-#endif
-#if defined(MSDOS) || defined(FEAT_MBYTE)
-{
-	/* cp850, MS-DOS */
-	for ( ; i < 128; ++i)
-	    spell_chartab[i] = FALSE;
-	for ( ; i <= 0x9a; ++i)
-	    spell_chartab[i] = TRUE;
-	for ( ; i < 0xa0; ++i)
-	    spell_chartab[i] = FALSE;
-	for ( ; i <= 0xa5; ++i)
-	    spell_chartab[i] = TRUE;
-	for ( ; i <= 255; ++i)
-	    spell_chartab[i] = FALSE;
-}
-#endif
-#ifdef FEAT_MBYTE
-else if (STRCMP(p_enc, "iso-8859-2") == 0)
-{
-	/* latin2 */
-	for ( ; i <= 0xa0; ++i)
-	    spell_chartab[i] = FALSE;
-	for ( ; i <= 255; ++i)
-	    spell_chartab[i] = TRUE;
-	spell_chartab[0xa4] = FALSE;	    /* currency sign */
-	spell_chartab[0xa7] = FALSE;	    /* paragraph sign */
-	spell_chartab[0xad] = FALSE;	    /* dash */
-	spell_chartab[0xb0] = FALSE;	    /* degrees */
-	spell_chartab[0xf7] = FALSE;	    /* divide-by */
 }
 else
 #endif
-#if defined(FEAT_MBYTE) || !defined(MSDOS)
+{
-{
+	/* Rough guess: use isalpha() for characters above 128. */
-	/* Rough guess: anything we don't recognize assumes word characters
+	for (i = 128; i < 256; ++i)
-	 * like latin1. */
+	{
-	for ( ; i < 0xc0; ++i)
+	    spelltab.st_isw[i] = isalpha(i);
-	    spell_chartab[i] = FALSE;
+	    if (isupper(i))
-	for ( ; i <= 255; ++i)
+	    {
-	    spell_chartab[i] = TRUE;
+		spelltab.st_isu[i] = TRUE;
-# ifdef FEAT_MBYTE
+		spelltab.st_fold[i] = tolower(i);
-	if (STRCMP(p_enc, "latin1") == 0)
+	    }
-# endif
+	}
-	    spell_chartab[0xf7] = FALSE;	    /* divide-by */
+}
 }
-#endif
-}
+static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
+static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
-/*
-* Return TRUE if "p" points to a word character.
+/*
-* This only depends on 'encoding', not on 'iskeyword'.
+* Set the spell character tables from strings in the affix file.
+*/
+int
+set_spell_chartab(fol, low, upp)
+char_u	*fol;
+char_u	*low;
+char_u	*upp;
+{
+/* We build the new tables here first, so that we can compare with the
+* previous one. */
+spelltab_T	new_st;
+char_u	*pf = fol, *pl = low, *pu = upp;
+int		f, l, u;
+clear_spell_chartab(&new_st);
+while (*pf != NUL)
+{
+	if (*pl == NUL || *pu == NUL)
+	{
+	    EMSG(_(e_affform));
+	    return FAIL;
+	}
+#ifdef FEAT_MBYTE
+	f = mb_ptr2char_adv(&pf);
+	l = mb_ptr2char_adv(&pl);
+	u = mb_ptr2char_adv(&pu);
+#else
+	f = *pf++;
+	l = *pl++;
+	u = *pu++;
+#endif
+	/* Every character that appears is a word character. */
+	if (f < 256)
+	    new_st.st_isw[f] = TRUE;
+	if (l < 256)
+	    new_st.st_isw[l] = TRUE;
+	if (u < 256)
+	    new_st.st_isw[u] = TRUE;
+	/* if "LOW" and "FOL" are not the same the "LOW" char needs
+	 * case-folding */
+	if (l < 256 && l != f)
+	{
+	    if (f >= 256)
+	    {
+		EMSG(_(e_affrange));
+		return FAIL;
+	    }
+	    new_st.st_fold[l] = f;
+	}
+	/* if "UPP" and "FOL" are not the same the "UPP" char needs
+	 * case-folding and it's upper case. */
+	if (u < 256 && u != f)
+	{
+	    if (f >= 256)
+	    {
+		EMSG(_(e_affrange));
+		return FAIL;
+	    }
+	    new_st.st_fold[u] = f;
+	    new_st.st_isu[u] = TRUE;
+	}
+}
+if (*pl != NUL || *pu != NUL)
+{
+	EMSG(_(e_affform));
+	return FAIL;
+}
+return set_spell_finish(&new_st);
+}
+/*
+* Set the spell character tables from strings in the .spl file.
+*/
+int
+set_spell_charflags(flags, cnt, upp)
+char_u	*flags;
+int		cnt;
+char_u	*upp;
+{
+/* We build the new tables here first, so that we can compare with the
+* previous one. */
+spelltab_T	new_st;
+int		i;
+char_u	*p = upp;
+clear_spell_chartab(&new_st);
+for (i = 0; i < cnt; ++i)
+{
+	new_st.st_isw[i + 128] = (flags[i] & SPELL_ISWORD) != 0;
+	new_st.st_isu[i + 128] = (flags[i] & SPELL_ISUPPER) != 0;
+	if (*p == NUL)
+	    return FAIL;
+#ifdef FEAT_MBYTE
+	new_st.st_fold[i + 128] = mb_ptr2char_adv(&p);
+#else
+	new_st.st_fold[i + 128] = *p++;
+#endif
+}
+return set_spell_finish(&new_st);
+}
+static int
+set_spell_finish(new_st)
+spelltab_T	*new_st;
+{
+int		i;
+if (did_set_spelltab)
+{
+	/* check that it's the same table */
+	for (i = 0; i < 256; ++i)
+	{
+	    if (spelltab.st_isw[i] != new_st->st_isw[i]
+		    || spelltab.st_isu[i] != new_st->st_isu[i]
+		    || spelltab.st_fold[i] != new_st->st_fold[i])
+	    {
+		EMSG(_("E763: Word characters differ between spell files"));
+		return FAIL;
+	    }
+	}
+}
+else
+{
+	/* copy the new spelltab into the one being used */
+	spelltab = *new_st;
+	did_set_spelltab = TRUE;
+}
+return OK;
+}
+#if defined(FEAT_MBYTE) || defined(PROTO)
+/*
+* Write the current tables into the .spl file.
+*/
+void
+write_spell_chartab(fd)
+FILE	*fd;
+{
+char_u	charbuf[256 * 4];
+int		len = 0;
+int		flags;
+int		i;
+if (!did_set_spelltab)
+{
+	/* No character table specified, write zero counts. */
+	fputc(0, fd);
+	fputc(0, fd);
+	fputc(0, fd);
+	return;
+}
+fputc(128, fd);				    /* <charflagslen> */
+for (i = 128; i < 256; ++i)
+{
+	flags = 0;
+	if (spelltab.st_isw[i])
+	    flags |= SPELL_ISWORD;
+	if (spelltab.st_isu[i])
+	    flags |= SPELL_ISUPPER;
+	fputc(flags, fd);			    /* <charflags> */
+	len += mb_char2bytes(spelltab.st_fold[i], charbuf + len);
+}
+put_bytes(fd, (long_u)len, 2);		    /* <fcharlen> */
+fwrite(charbuf, (size_t)len, (size_t)1, fd);    /* <fchars> */
+}
+#endif
+/*
+* Return TRUE if "p" points to a word character for spelling.
 */
 int
 spell_iswordc(p)
 char_u *p;
 {
 # ifdef FEAT_MBYTE
 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
 	return mb_get_class(p) >= 2;
 # endif
-return spell_chartab[*p];
+return spelltab.st_isw[*p];
 }
-#endif
+/*
+* Return TRUE if "c" is an upper-case character for spelling.
+*/
+int
+spell_isupper(c)
+int		c;
+{
+# ifdef FEAT_MBYTE
+if (enc_utf8)
+{
+	/* For Unicode we can call utf_isupper(), but don't do that for ASCII,
+	 * because we don't want to use 'casemap' here. */
+	if (c >= 128)
+	    return utf_isupper(c);
+}
+else if (has_mbyte && c > 256)
+{
+	/* For characters above 255 we don't have something specfied.
+	 * Fall back to locale-dependent iswupper().  If not available
+	 * simply return FALSE. */
+#  ifdef HAVE_ISWUPPER
+	return iswupper(c);
+#  else
+	return FALSE;
+#  endif
+}
+# endif
+return spelltab.st_isu[c];
+}
+/*
+* case-fold "p[len]" into "buf[buflen]".  Used for spell checking.
+* Returns FAIL when something wrong.
+*/
+int
+spell_casefold(p, len, buf, buflen)
+char_u	*p;
+int		len;
+char_u	*buf;
+int		buflen;
+{
+int		i;
+if (len >= buflen)
+{
+	buf[0] = NUL;
+	return FAIL;		/* result will not fit */
+}
+#ifdef FEAT_MBYTE
+if (has_mbyte)
+{
+	int	c;
+	int	outi = 0;
+	/* Fold one character at a time. */
+	for (i = 0; i < len; i += mb_ptr2len_check(p + i))
+	{
+	    c = mb_ptr2char(p + i);
+	    if (enc_utf8)
+		/* For Unicode case folding is always the same, no need to use
+		 * the table from the spell file. */
+		c = utf_fold(c);
+	    else if (c < 256)
+		/* Use the table from the spell file. */
+		c = spelltab.st_fold[c];
+# ifdef HAVE_TOWLOWER
+	    else
+		/* We don't know what to do, fall back to towlower(), it
+		 * depends on the current locale. */
+		c = towlower(c);
+# endif
+	    if (outi + MB_MAXBYTES > buflen)
+	    {
+		buf[outi] = NUL;
+		return FAIL;
+	    }
+	    outi += mb_char2bytes(c, buf + outi);
+	}
+	buf[outi] = NUL;
+}
+else
+#endif
+{
+	/* Be quick for non-multibyte encodings. */
+	for (i = 0; i < len; ++i)
+	    buf[i] = spelltab.st_fold[p[i]];
+	buf[i] = NUL;
+}
+return OK;
+}
+#endif /* FEAT_SYN_HL */
 /*
 * return TRUE if 'c' is a valid file-name character
 * Assume characters above 0x100 are valid (multi-byte).
 */

Mercurial > vim

comparison src/charset.c @ 258:f93df7322443