Mercurial > vim
comparison src/charset.c @ 258:f93df7322443
updated for version 7.0070
author | vimboss |
---|---|
date | Sat, 23 Apr 2005 20:52:00 +0000 |
parents | 4707450c2b33 |
children | b3c0268f7815 |
comparison
equal
deleted
inserted
replaced
257:51a4d1c2a95b | 258:f93df7322443 |
---|---|
913 return mb_get_class(p) >= 2; | 913 return mb_get_class(p) >= 2; |
914 # endif | 914 # endif |
915 return (GET_CHARTAB(buf, *p) != 0); | 915 return (GET_CHARTAB(buf, *p) != 0); |
916 } | 916 } |
917 | 917 |
918 static char spell_chartab[256]; | 918 /* |
919 * The tables used for spelling. These are only used for the first 256 | |
920 * characters. | |
921 */ | |
922 typedef struct spelltab_S | |
923 { | |
924 char_u st_isw[256]; /* flags: is word char */ | |
925 char_u st_isu[256]; /* flags: is uppercase char */ | |
926 char_u st_fold[256]; /* chars: folded case */ | |
927 } spelltab_T; | |
928 | |
929 static spelltab_T spelltab; | |
930 static int did_set_spelltab; | |
931 | |
932 #define SPELL_ISWORD 1 | |
933 #define SPELL_ISUPPER 2 | |
934 | |
935 static void clear_spell_chartab __ARGS((spelltab_T *sp)); | |
936 static int set_spell_finish __ARGS((spelltab_T *new_st)); | |
937 | |
938 /* | |
939 * Init the chartab used for spelling for ASCII. | |
940 * EBCDIC is not supported! | |
941 */ | |
942 static void | |
943 clear_spell_chartab(sp) | |
944 spelltab_T *sp; | |
945 { | |
946 int i; | |
947 | |
948 /* Init everything to FALSE. */ | |
949 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw)); | |
950 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu)); | |
951 for (i = 0; i < 256; ++i) | |
952 sp->st_fold[i] = i; | |
953 | |
954 /* We include digits. A word shouldn't start with a digit, but handling | |
955 * that is done separately. */ | |
956 for (i = '0'; i <= '9'; ++i) | |
957 sp->st_isw[i] = TRUE; | |
958 for (i = 'A'; i <= 'Z'; ++i) | |
959 { | |
960 sp->st_isw[i] = TRUE; | |
961 sp->st_isu[i] = TRUE; | |
962 sp->st_fold[i] = i + 0x20; | |
963 } | |
964 for (i = 'a'; i <= 'z'; ++i) | |
965 sp->st_isw[i] = TRUE; | |
966 } | |
919 | 967 |
920 /* | 968 /* |
921 * Init the chartab used for spelling. Only depends on 'encoding'. | 969 * Init the chartab used for spelling. Only depends on 'encoding'. |
922 * Called once while starting up and when 'encoding' was changed. | 970 * Called once while starting up and when 'encoding' changes. |
923 * Unfortunately, we can't use isalpha() here, since the current locale may | 971 * The default is to use isalpha(), but the spell file should define the word |
924 * differ from 'encoding'. | 972 * characters to make it possible that 'encoding' differs from the current |
973 * locale. | |
925 */ | 974 */ |
926 void | 975 void |
927 init_spell_chartab() | 976 init_spell_chartab() |
928 { | 977 { |
929 int i; | 978 int i; |
930 | 979 |
931 /* ASCII is always the same, no matter what 'encoding' is used. | 980 did_set_spelltab = FALSE; |
932 * EBCDIC is not supported! */ | 981 clear_spell_chartab(&spelltab); |
933 for (i = 0; i < '0'; ++i) | 982 |
934 spell_chartab[i] = FALSE; | |
935 /* We include numbers. A word shouldn't start with a number, but handling | |
936 * that is done separately. */ | |
937 for ( ; i <= '9'; ++i) | |
938 spell_chartab[i] = TRUE; | |
939 for ( ; i < 'A'; ++i) | |
940 spell_chartab[i] = FALSE; | |
941 for ( ; i <= 'Z'; ++i) | |
942 spell_chartab[i] = TRUE; | |
943 for ( ; i < 'a'; ++i) | |
944 spell_chartab[i] = FALSE; | |
945 for ( ; i <= 'z'; ++i) | |
946 spell_chartab[i] = TRUE; | |
947 #ifdef FEAT_MBYTE | 983 #ifdef FEAT_MBYTE |
948 if (enc_dbcs) | 984 if (enc_dbcs) |
949 { | 985 { |
950 /* DBCS: assume double-wide characters are word characters. */ | 986 /* DBCS: assume double-wide characters are word characters. */ |
951 for ( ; i <= 255; ++i) | 987 for (i = 128; i <= 255; ++i) |
952 if (MB_BYTE2LEN(i) == 2) | 988 if (MB_BYTE2LEN(i) == 2) |
953 spell_chartab[i] = TRUE; | 989 spelltab.st_isw[i] = TRUE; |
954 else | |
955 spell_chartab[i] = FALSE; | |
956 } | |
957 else if (STRCMP(p_enc, "cp850") == 0) | |
958 #endif | |
959 #if defined(MSDOS) || defined(FEAT_MBYTE) | |
960 { | |
961 /* cp850, MS-DOS */ | |
962 for ( ; i < 128; ++i) | |
963 spell_chartab[i] = FALSE; | |
964 for ( ; i <= 0x9a; ++i) | |
965 spell_chartab[i] = TRUE; | |
966 for ( ; i < 0xa0; ++i) | |
967 spell_chartab[i] = FALSE; | |
968 for ( ; i <= 0xa5; ++i) | |
969 spell_chartab[i] = TRUE; | |
970 for ( ; i <= 255; ++i) | |
971 spell_chartab[i] = FALSE; | |
972 } | |
973 #endif | |
974 #ifdef FEAT_MBYTE | |
975 else if (STRCMP(p_enc, "iso-8859-2") == 0) | |
976 { | |
977 /* latin2 */ | |
978 for ( ; i <= 0xa0; ++i) | |
979 spell_chartab[i] = FALSE; | |
980 for ( ; i <= 255; ++i) | |
981 spell_chartab[i] = TRUE; | |
982 spell_chartab[0xa4] = FALSE; /* currency sign */ | |
983 spell_chartab[0xa7] = FALSE; /* paragraph sign */ | |
984 spell_chartab[0xad] = FALSE; /* dash */ | |
985 spell_chartab[0xb0] = FALSE; /* degrees */ | |
986 spell_chartab[0xf7] = FALSE; /* divide-by */ | |
987 } | 990 } |
988 else | 991 else |
989 #endif | 992 #endif |
990 #if defined(FEAT_MBYTE) || !defined(MSDOS) | 993 { |
991 { | 994 /* Rough guess: use isalpha() for characters above 128. */ |
992 /* Rough guess: anything we don't recognize assumes word characters | 995 for (i = 128; i < 256; ++i) |
993 * like latin1. */ | 996 { |
994 for ( ; i < 0xc0; ++i) | 997 spelltab.st_isw[i] = isalpha(i); |
995 spell_chartab[i] = FALSE; | 998 if (isupper(i)) |
996 for ( ; i <= 255; ++i) | 999 { |
997 spell_chartab[i] = TRUE; | 1000 spelltab.st_isu[i] = TRUE; |
998 # ifdef FEAT_MBYTE | 1001 spelltab.st_fold[i] = tolower(i); |
999 if (STRCMP(p_enc, "latin1") == 0) | 1002 } |
1000 # endif | 1003 } |
1001 spell_chartab[0xf7] = FALSE; /* divide-by */ | 1004 } |
1002 } | 1005 } |
1003 #endif | 1006 |
1004 } | 1007 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); |
1005 | 1008 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); |
1006 /* | 1009 |
1007 * Return TRUE if "p" points to a word character. | 1010 /* |
1008 * This only depends on 'encoding', not on 'iskeyword'. | 1011 * Set the spell character tables from strings in the affix file. |
1012 */ | |
1013 int | |
1014 set_spell_chartab(fol, low, upp) | |
1015 char_u *fol; | |
1016 char_u *low; | |
1017 char_u *upp; | |
1018 { | |
1019 /* We build the new tables here first, so that we can compare with the | |
1020 * previous one. */ | |
1021 spelltab_T new_st; | |
1022 char_u *pf = fol, *pl = low, *pu = upp; | |
1023 int f, l, u; | |
1024 | |
1025 clear_spell_chartab(&new_st); | |
1026 | |
1027 while (*pf != NUL) | |
1028 { | |
1029 if (*pl == NUL || *pu == NUL) | |
1030 { | |
1031 EMSG(_(e_affform)); | |
1032 return FAIL; | |
1033 } | |
1034 #ifdef FEAT_MBYTE | |
1035 f = mb_ptr2char_adv(&pf); | |
1036 l = mb_ptr2char_adv(&pl); | |
1037 u = mb_ptr2char_adv(&pu); | |
1038 #else | |
1039 f = *pf++; | |
1040 l = *pl++; | |
1041 u = *pu++; | |
1042 #endif | |
1043 /* Every character that appears is a word character. */ | |
1044 if (f < 256) | |
1045 new_st.st_isw[f] = TRUE; | |
1046 if (l < 256) | |
1047 new_st.st_isw[l] = TRUE; | |
1048 if (u < 256) | |
1049 new_st.st_isw[u] = TRUE; | |
1050 | |
1051 /* if "LOW" and "FOL" are not the same the "LOW" char needs | |
1052 * case-folding */ | |
1053 if (l < 256 && l != f) | |
1054 { | |
1055 if (f >= 256) | |
1056 { | |
1057 EMSG(_(e_affrange)); | |
1058 return FAIL; | |
1059 } | |
1060 new_st.st_fold[l] = f; | |
1061 } | |
1062 | |
1063 /* if "UPP" and "FOL" are not the same the "UPP" char needs | |
1064 * case-folding and it's upper case. */ | |
1065 if (u < 256 && u != f) | |
1066 { | |
1067 if (f >= 256) | |
1068 { | |
1069 EMSG(_(e_affrange)); | |
1070 return FAIL; | |
1071 } | |
1072 new_st.st_fold[u] = f; | |
1073 new_st.st_isu[u] = TRUE; | |
1074 } | |
1075 } | |
1076 | |
1077 if (*pl != NUL || *pu != NUL) | |
1078 { | |
1079 EMSG(_(e_affform)); | |
1080 return FAIL; | |
1081 } | |
1082 | |
1083 return set_spell_finish(&new_st); | |
1084 } | |
1085 | |
1086 /* | |
1087 * Set the spell character tables from strings in the .spl file. | |
1088 */ | |
1089 int | |
1090 set_spell_charflags(flags, cnt, upp) | |
1091 char_u *flags; | |
1092 int cnt; | |
1093 char_u *upp; | |
1094 { | |
1095 /* We build the new tables here first, so that we can compare with the | |
1096 * previous one. */ | |
1097 spelltab_T new_st; | |
1098 int i; | |
1099 char_u *p = upp; | |
1100 | |
1101 clear_spell_chartab(&new_st); | |
1102 | |
1103 for (i = 0; i < cnt; ++i) | |
1104 { | |
1105 new_st.st_isw[i + 128] = (flags[i] & SPELL_ISWORD) != 0; | |
1106 new_st.st_isu[i + 128] = (flags[i] & SPELL_ISUPPER) != 0; | |
1107 | |
1108 if (*p == NUL) | |
1109 return FAIL; | |
1110 #ifdef FEAT_MBYTE | |
1111 new_st.st_fold[i + 128] = mb_ptr2char_adv(&p); | |
1112 #else | |
1113 new_st.st_fold[i + 128] = *p++; | |
1114 #endif | |
1115 } | |
1116 | |
1117 return set_spell_finish(&new_st); | |
1118 } | |
1119 | |
1120 static int | |
1121 set_spell_finish(new_st) | |
1122 spelltab_T *new_st; | |
1123 { | |
1124 int i; | |
1125 | |
1126 if (did_set_spelltab) | |
1127 { | |
1128 /* check that it's the same table */ | |
1129 for (i = 0; i < 256; ++i) | |
1130 { | |
1131 if (spelltab.st_isw[i] != new_st->st_isw[i] | |
1132 || spelltab.st_isu[i] != new_st->st_isu[i] | |
1133 || spelltab.st_fold[i] != new_st->st_fold[i]) | |
1134 { | |
1135 EMSG(_("E763: Word characters differ between spell files")); | |
1136 return FAIL; | |
1137 } | |
1138 } | |
1139 } | |
1140 else | |
1141 { | |
1142 /* copy the new spelltab into the one being used */ | |
1143 spelltab = *new_st; | |
1144 did_set_spelltab = TRUE; | |
1145 } | |
1146 | |
1147 return OK; | |
1148 } | |
1149 | |
1150 #if defined(FEAT_MBYTE) || defined(PROTO) | |
1151 /* | |
1152 * Write the current tables into the .spl file. | |
1153 */ | |
1154 void | |
1155 write_spell_chartab(fd) | |
1156 FILE *fd; | |
1157 { | |
1158 char_u charbuf[256 * 4]; | |
1159 int len = 0; | |
1160 int flags; | |
1161 int i; | |
1162 | |
1163 if (!did_set_spelltab) | |
1164 { | |
1165 /* No character table specified, write zero counts. */ | |
1166 fputc(0, fd); | |
1167 fputc(0, fd); | |
1168 fputc(0, fd); | |
1169 return; | |
1170 } | |
1171 | |
1172 fputc(128, fd); /* <charflagslen> */ | |
1173 for (i = 128; i < 256; ++i) | |
1174 { | |
1175 flags = 0; | |
1176 if (spelltab.st_isw[i]) | |
1177 flags |= SPELL_ISWORD; | |
1178 if (spelltab.st_isu[i]) | |
1179 flags |= SPELL_ISUPPER; | |
1180 fputc(flags, fd); /* <charflags> */ | |
1181 | |
1182 len += mb_char2bytes(spelltab.st_fold[i], charbuf + len); | |
1183 } | |
1184 | |
1185 put_bytes(fd, (long_u)len, 2); /* <fcharlen> */ | |
1186 fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */ | |
1187 } | |
1188 #endif | |
1189 | |
1190 /* | |
1191 * Return TRUE if "p" points to a word character for spelling. | |
1009 */ | 1192 */ |
1010 int | 1193 int |
1011 spell_iswordc(p) | 1194 spell_iswordc(p) |
1012 char_u *p; | 1195 char_u *p; |
1013 { | 1196 { |
1014 # ifdef FEAT_MBYTE | 1197 # ifdef FEAT_MBYTE |
1015 if (has_mbyte && MB_BYTE2LEN(*p) > 1) | 1198 if (has_mbyte && MB_BYTE2LEN(*p) > 1) |
1016 return mb_get_class(p) >= 2; | 1199 return mb_get_class(p) >= 2; |
1017 # endif | 1200 # endif |
1018 return spell_chartab[*p]; | 1201 return spelltab.st_isw[*p]; |
1019 } | 1202 } |
1020 #endif | 1203 |
1204 /* | |
1205 * Return TRUE if "c" is an upper-case character for spelling. | |
1206 */ | |
1207 int | |
1208 spell_isupper(c) | |
1209 int c; | |
1210 { | |
1211 # ifdef FEAT_MBYTE | |
1212 if (enc_utf8) | |
1213 { | |
1214 /* For Unicode we can call utf_isupper(), but don't do that for ASCII, | |
1215 * because we don't want to use 'casemap' here. */ | |
1216 if (c >= 128) | |
1217 return utf_isupper(c); | |
1218 } | |
1219 else if (has_mbyte && c > 256) | |
1220 { | |
1221 /* For characters above 255 we don't have something specfied. | |
1222 * Fall back to locale-dependent iswupper(). If not available | |
1223 * simply return FALSE. */ | |
1224 # ifdef HAVE_ISWUPPER | |
1225 return iswupper(c); | |
1226 # else | |
1227 return FALSE; | |
1228 # endif | |
1229 } | |
1230 # endif | |
1231 return spelltab.st_isu[c]; | |
1232 } | |
1233 | |
1234 /* | |
1235 * case-fold "p[len]" into "buf[buflen]". Used for spell checking. | |
1236 * Returns FAIL when something wrong. | |
1237 */ | |
1238 int | |
1239 spell_casefold(p, len, buf, buflen) | |
1240 char_u *p; | |
1241 int len; | |
1242 char_u *buf; | |
1243 int buflen; | |
1244 { | |
1245 int i; | |
1246 | |
1247 if (len >= buflen) | |
1248 { | |
1249 buf[0] = NUL; | |
1250 return FAIL; /* result will not fit */ | |
1251 } | |
1252 | |
1253 #ifdef FEAT_MBYTE | |
1254 if (has_mbyte) | |
1255 { | |
1256 int c; | |
1257 int outi = 0; | |
1258 | |
1259 /* Fold one character at a time. */ | |
1260 for (i = 0; i < len; i += mb_ptr2len_check(p + i)) | |
1261 { | |
1262 c = mb_ptr2char(p + i); | |
1263 if (enc_utf8) | |
1264 /* For Unicode case folding is always the same, no need to use | |
1265 * the table from the spell file. */ | |
1266 c = utf_fold(c); | |
1267 else if (c < 256) | |
1268 /* Use the table from the spell file. */ | |
1269 c = spelltab.st_fold[c]; | |
1270 # ifdef HAVE_TOWLOWER | |
1271 else | |
1272 /* We don't know what to do, fall back to towlower(), it | |
1273 * depends on the current locale. */ | |
1274 c = towlower(c); | |
1275 # endif | |
1276 if (outi + MB_MAXBYTES > buflen) | |
1277 { | |
1278 buf[outi] = NUL; | |
1279 return FAIL; | |
1280 } | |
1281 outi += mb_char2bytes(c, buf + outi); | |
1282 } | |
1283 buf[outi] = NUL; | |
1284 } | |
1285 else | |
1286 #endif | |
1287 { | |
1288 /* Be quick for non-multibyte encodings. */ | |
1289 for (i = 0; i < len; ++i) | |
1290 buf[i] = spelltab.st_fold[p[i]]; | |
1291 buf[i] = NUL; | |
1292 } | |
1293 | |
1294 return OK; | |
1295 } | |
1296 | |
1297 #endif /* FEAT_SYN_HL */ | |
1021 | 1298 |
1022 /* | 1299 /* |
1023 * return TRUE if 'c' is a valid file-name character | 1300 * return TRUE if 'c' is a valid file-name character |
1024 * Assume characters above 0x100 are valid (multi-byte). | 1301 * Assume characters above 0x100 are valid (multi-byte). |
1025 */ | 1302 */ |