Mercurial > vim
annotate src/charset.c @ 4001:d9b4cf53be2e v7.3.755
updated for version 7.3.755
Problem: Autoconf doesn't find Python 3 if it's called "python".
Solution: Search for "python2" and "python3" first, then "python".
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Wed, 12 Dec 2012 14:25:05 +0100 |
parents | a17918b76ca1 |
children | 80b041b994d1 |
rev | line source |
---|---|
7 | 1 /* vi:set ts=8 sts=4 sw=4: |
2 * | |
3 * VIM - Vi IMproved by Bram Moolenaar | |
4 * | |
5 * Do ":help uganda" in Vim to read copying and usage conditions. | |
6 * Do ":help credits" in Vim to see a list of people who contributed. | |
7 * See README.txt for an overview of the Vim source code. | |
8 */ | |
9 | |
10 #include "vim.h" | |
11 | |
12 #ifdef FEAT_LINEBREAK | |
13 static int win_chartabsize __ARGS((win_T *wp, char_u *p, colnr_T col)); | |
14 #endif | |
15 | |
16 #ifdef FEAT_MBYTE | |
2541
8a156630208b
Include wchar.h in charset.c for towupper().
Bram Moolenaar <bram@vim.org>
parents:
2339
diff
changeset
|
17 # if defined(HAVE_WCHAR_H) |
8a156630208b
Include wchar.h in charset.c for towupper().
Bram Moolenaar <bram@vim.org>
parents:
2339
diff
changeset
|
18 # include <wchar.h> /* for towupper() and towlower() */ |
8a156630208b
Include wchar.h in charset.c for towupper().
Bram Moolenaar <bram@vim.org>
parents:
2339
diff
changeset
|
19 # endif |
7 | 20 static int win_nolbr_chartabsize __ARGS((win_T *wp, char_u *s, colnr_T col, int *headp)); |
21 #endif | |
22 | |
1869 | 23 static unsigned nr2hex __ARGS((unsigned c)); |
7 | 24 |
25 static int chartab_initialized = FALSE; | |
26 | |
27 /* b_chartab[] is an array of 32 bytes, each bit representing one of the | |
28 * characters 0-255. */ | |
29 #define SET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] |= (1 << ((c) & 0x7)) | |
30 #define RESET_CHARTAB(buf, c) (buf)->b_chartab[(unsigned)(c) >> 3] &= ~(1 << ((c) & 0x7)) | |
31 #define GET_CHARTAB(buf, c) ((buf)->b_chartab[(unsigned)(c) >> 3] & (1 << ((c) & 0x7))) | |
32 | |
33 /* | |
34 * Fill chartab[]. Also fills curbuf->b_chartab[] with flags for keyword | |
35 * characters for current buffer. | |
36 * | |
37 * Depends on the option settings 'iskeyword', 'isident', 'isfname', | |
38 * 'isprint' and 'encoding'. | |
39 * | |
40 * The index in chartab[] depends on 'encoding': | |
41 * - For non-multi-byte index with the byte (same as the character). | |
42 * - For DBCS index with the first byte. | |
43 * - For UTF-8 index with the character (when first byte is up to 0x80 it is | |
44 * the same as the character, if the first byte is 0x80 and above it depends | |
45 * on further bytes). | |
46 * | |
47 * The contents of chartab[]: | |
48 * - The lower two bits, masked by CT_CELL_MASK, give the number of display | |
49 * cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80. | |
50 * - CT_PRINT_CHAR bit is set when the character is printable (no need to | |
51 * translate the character before displaying it). Note that only DBCS | |
52 * characters can have 2 display cells and still be printable. | |
53 * - CT_FNAME_CHAR bit is set when the character can be in a file name. | |
54 * - CT_ID_CHAR bit is set when the character can be in an identifier. | |
55 * | |
56 * Return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has an | |
57 * error, OK otherwise. | |
58 */ | |
59 int | |
60 init_chartab() | |
61 { | |
62 return buf_init_chartab(curbuf, TRUE); | |
63 } | |
64 | |
65 int | |
66 buf_init_chartab(buf, global) | |
67 buf_T *buf; | |
68 int global; /* FALSE: only set buf->b_chartab[] */ | |
69 { | |
70 int c; | |
71 int c2; | |
72 char_u *p; | |
73 int i; | |
74 int tilde; | |
75 int do_isalpha; | |
76 | |
77 if (global) | |
78 { | |
79 /* | |
80 * Set the default size for printable characters: | |
81 * From <Space> to '~' is 1 (printable), others are 2 (not printable). | |
82 * This also inits all 'isident' and 'isfname' flags to FALSE. | |
83 * | |
84 * EBCDIC: all chars below ' ' are not printable, all others are | |
85 * printable. | |
86 */ | |
87 c = 0; | |
88 while (c < ' ') | |
89 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2; | |
90 #ifdef EBCDIC | |
91 while (c < 255) | |
92 #else | |
93 while (c <= '~') | |
94 #endif | |
95 chartab[c++] = 1 + CT_PRINT_CHAR; | |
96 #ifdef FEAT_FKMAP | |
97 if (p_altkeymap) | |
98 { | |
99 while (c < YE) | |
100 chartab[c++] = 1 + CT_PRINT_CHAR; | |
101 } | |
102 #endif | |
103 while (c < 256) | |
104 { | |
105 #ifdef FEAT_MBYTE | |
106 /* UTF-8: bytes 0xa0 - 0xff are printable (latin1) */ | |
107 if (enc_utf8 && c >= 0xa0) | |
108 chartab[c++] = CT_PRINT_CHAR + 1; | |
109 /* euc-jp characters starting with 0x8e are single width */ | |
110 else if (enc_dbcs == DBCS_JPNU && c == 0x8e) | |
111 chartab[c++] = CT_PRINT_CHAR + 1; | |
112 /* other double-byte chars can be printable AND double-width */ | |
113 else if (enc_dbcs != 0 && MB_BYTE2LEN(c) == 2) | |
114 chartab[c++] = CT_PRINT_CHAR + 2; | |
115 else | |
116 #endif | |
117 /* the rest is unprintable by default */ | |
118 chartab[c++] = (dy_flags & DY_UHEX) ? 4 : 2; | |
119 } | |
120 | |
121 #ifdef FEAT_MBYTE | |
122 /* Assume that every multi-byte char is a filename character. */ | |
123 for (c = 1; c < 256; ++c) | |
124 if ((enc_dbcs != 0 && MB_BYTE2LEN(c) > 1) | |
125 || (enc_dbcs == DBCS_JPNU && c == 0x8e) | |
126 || (enc_utf8 && c >= 0xa0)) | |
127 chartab[c] |= CT_FNAME_CHAR; | |
128 #endif | |
129 } | |
130 | |
131 /* | |
132 * Init word char flags all to FALSE | |
133 */ | |
134 vim_memset(buf->b_chartab, 0, (size_t)32); | |
135 #ifdef FEAT_MBYTE | |
227 | 136 if (enc_dbcs != 0) |
137 for (c = 0; c < 256; ++c) | |
138 { | |
139 /* double-byte characters are probably word characters */ | |
140 if (MB_BYTE2LEN(c) == 2) | |
141 SET_CHARTAB(buf, c); | |
142 } | |
7 | 143 #endif |
144 | |
145 #ifdef FEAT_LISP | |
146 /* | |
147 * In lisp mode the '-' character is included in keywords. | |
148 */ | |
149 if (buf->b_p_lisp) | |
150 SET_CHARTAB(buf, '-'); | |
151 #endif | |
152 | |
153 /* Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint' | |
154 * options Each option is a list of characters, character numbers or | |
155 * ranges, separated by commas, e.g.: "200-210,x,#-178,-" | |
156 */ | |
157 for (i = global ? 0 : 3; i <= 3; ++i) | |
158 { | |
159 if (i == 0) | |
160 p = p_isi; /* first round: 'isident' */ | |
161 else if (i == 1) | |
162 p = p_isp; /* second round: 'isprint' */ | |
163 else if (i == 2) | |
164 p = p_isf; /* third round: 'isfname' */ | |
165 else /* i == 3 */ | |
166 p = buf->b_p_isk; /* fourth round: 'iskeyword' */ | |
167 | |
168 while (*p) | |
169 { | |
170 tilde = FALSE; | |
171 do_isalpha = FALSE; | |
172 if (*p == '^' && p[1] != NUL) | |
173 { | |
174 tilde = TRUE; | |
175 ++p; | |
176 } | |
177 if (VIM_ISDIGIT(*p)) | |
178 c = getdigits(&p); | |
179 else | |
1955 | 180 #ifdef FEAT_MBYTE |
181 if (has_mbyte) | |
182 c = mb_ptr2char_adv(&p); | |
183 else | |
184 #endif | |
7 | 185 c = *p++; |
186 c2 = -1; | |
187 if (*p == '-' && p[1] != NUL) | |
188 { | |
189 ++p; | |
190 if (VIM_ISDIGIT(*p)) | |
191 c2 = getdigits(&p); | |
192 else | |
1979 | 193 #ifdef FEAT_MBYTE |
194 if (has_mbyte) | |
195 c2 = mb_ptr2char_adv(&p); | |
196 else | |
197 #endif | |
7 | 198 c2 = *p++; |
199 } | |
1979 | 200 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256 |
7 | 201 || !(*p == NUL || *p == ',')) |
202 return FAIL; | |
203 | |
204 if (c2 == -1) /* not a range */ | |
205 { | |
206 /* | |
207 * A single '@' (not "@-@"): | |
208 * Decide on letters being ID/printable/keyword chars with | |
209 * standard function isalpha(). This takes care of locale for | |
210 * single-byte characters). | |
211 */ | |
212 if (c == '@') | |
213 { | |
214 do_isalpha = TRUE; | |
215 c = 1; | |
216 c2 = 255; | |
217 } | |
218 else | |
219 c2 = c; | |
220 } | |
221 while (c <= c2) | |
222 { | |
1365 | 223 /* Use the MB_ functions here, because isalpha() doesn't |
224 * work properly when 'encoding' is "latin1" and the locale is | |
225 * "C". */ | |
226 if (!do_isalpha || MB_ISLOWER(c) || MB_ISUPPER(c) | |
7 | 227 #ifdef FEAT_FKMAP |
228 || (p_altkeymap && (F_isalpha(c) || F_isdigit(c))) | |
229 #endif | |
230 ) | |
231 { | |
232 if (i == 0) /* (re)set ID flag */ | |
233 { | |
234 if (tilde) | |
235 chartab[c] &= ~CT_ID_CHAR; | |
236 else | |
237 chartab[c] |= CT_ID_CHAR; | |
238 } | |
239 else if (i == 1) /* (re)set printable */ | |
240 { | |
241 if ((c < ' ' | |
242 #ifndef EBCDIC | |
243 || c > '~' | |
244 #endif | |
245 #ifdef FEAT_FKMAP | |
246 || (p_altkeymap | |
247 && (F_isalpha(c) || F_isdigit(c))) | |
248 #endif | |
249 ) | |
250 #ifdef FEAT_MBYTE | |
251 /* For double-byte we keep the cell width, so | |
252 * that we can detect it from the first byte. */ | |
253 && !(enc_dbcs && MB_BYTE2LEN(c) == 2) | |
254 #endif | |
255 ) | |
256 { | |
257 if (tilde) | |
258 { | |
259 chartab[c] = (chartab[c] & ~CT_CELL_MASK) | |
260 + ((dy_flags & DY_UHEX) ? 4 : 2); | |
261 chartab[c] &= ~CT_PRINT_CHAR; | |
262 } | |
263 else | |
264 { | |
265 chartab[c] = (chartab[c] & ~CT_CELL_MASK) + 1; | |
266 chartab[c] |= CT_PRINT_CHAR; | |
267 } | |
268 } | |
269 } | |
270 else if (i == 2) /* (re)set fname flag */ | |
271 { | |
272 if (tilde) | |
273 chartab[c] &= ~CT_FNAME_CHAR; | |
274 else | |
275 chartab[c] |= CT_FNAME_CHAR; | |
276 } | |
277 else /* i == 3 */ /* (re)set keyword flag */ | |
278 { | |
279 if (tilde) | |
280 RESET_CHARTAB(buf, c); | |
281 else | |
282 SET_CHARTAB(buf, c); | |
283 } | |
284 } | |
285 ++c; | |
286 } | |
287 p = skip_to_option_part(p); | |
288 } | |
289 } | |
290 chartab_initialized = TRUE; | |
291 return OK; | |
292 } | |
293 | |
294 /* | |
295 * Translate any special characters in buf[bufsize] in-place. | |
296 * The result is a string with only printable characters, but if there is not | |
297 * enough room, not all characters will be translated. | |
298 */ | |
299 void | |
300 trans_characters(buf, bufsize) | |
301 char_u *buf; | |
302 int bufsize; | |
303 { | |
304 int len; /* length of string needing translation */ | |
305 int room; /* room in buffer after string */ | |
306 char_u *trs; /* translated character */ | |
307 int trs_len; /* length of trs[] */ | |
308 | |
309 len = (int)STRLEN(buf); | |
310 room = bufsize - len; | |
311 while (*buf != 0) | |
312 { | |
313 # ifdef FEAT_MBYTE | |
314 /* Assume a multi-byte character doesn't need translation. */ | |
474 | 315 if (has_mbyte && (trs_len = (*mb_ptr2len)(buf)) > 1) |
7 | 316 len -= trs_len; |
317 else | |
318 # endif | |
319 { | |
320 trs = transchar_byte(*buf); | |
321 trs_len = (int)STRLEN(trs); | |
322 if (trs_len > 1) | |
323 { | |
324 room -= trs_len - 1; | |
325 if (room <= 0) | |
326 return; | |
327 mch_memmove(buf + trs_len, buf + 1, (size_t)len); | |
328 } | |
329 mch_memmove(buf, trs, (size_t)trs_len); | |
330 --len; | |
331 } | |
332 buf += trs_len; | |
333 } | |
334 } | |
335 | |
1097 | 336 #if defined(FEAT_EVAL) || defined(FEAT_TITLE) || defined(FEAT_INS_EXPAND) \ |
337 || defined(PROTO) | |
7 | 338 /* |
339 * Translate a string into allocated memory, replacing special chars with | |
340 * printable chars. Returns NULL when out of memory. | |
341 */ | |
342 char_u * | |
343 transstr(s) | |
344 char_u *s; | |
345 { | |
346 char_u *res; | |
347 char_u *p; | |
348 #ifdef FEAT_MBYTE | |
349 int l, len, c; | |
350 char_u hexbuf[11]; | |
351 #endif | |
352 | |
353 #ifdef FEAT_MBYTE | |
354 if (has_mbyte) | |
355 { | |
356 /* Compute the length of the result, taking account of unprintable | |
357 * multi-byte characters. */ | |
358 len = 0; | |
359 p = s; | |
360 while (*p != NUL) | |
361 { | |
474 | 362 if ((l = (*mb_ptr2len)(p)) > 1) |
7 | 363 { |
364 c = (*mb_ptr2char)(p); | |
365 p += l; | |
366 if (vim_isprintc(c)) | |
367 len += l; | |
368 else | |
369 { | |
370 transchar_hex(hexbuf, c); | |
835 | 371 len += (int)STRLEN(hexbuf); |
7 | 372 } |
373 } | |
374 else | |
375 { | |
376 l = byte2cells(*p++); | |
377 if (l > 0) | |
378 len += l; | |
379 else | |
380 len += 4; /* illegal byte sequence */ | |
381 } | |
382 } | |
383 res = alloc((unsigned)(len + 1)); | |
384 } | |
385 else | |
386 #endif | |
387 res = alloc((unsigned)(vim_strsize(s) + 1)); | |
388 if (res != NULL) | |
389 { | |
390 *res = NUL; | |
391 p = s; | |
392 while (*p != NUL) | |
393 { | |
394 #ifdef FEAT_MBYTE | |
474 | 395 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) |
7 | 396 { |
397 c = (*mb_ptr2char)(p); | |
398 if (vim_isprintc(c)) | |
399 STRNCAT(res, p, l); /* append printable multi-byte char */ | |
400 else | |
401 transchar_hex(res + STRLEN(res), c); | |
402 p += l; | |
403 } | |
404 else | |
405 #endif | |
406 STRCAT(res, transchar_byte(*p++)); | |
407 } | |
408 } | |
409 return res; | |
410 } | |
411 #endif | |
412 | |
413 #if defined(FEAT_SYN_HL) || defined(FEAT_INS_EXPAND) || defined(PROTO) | |
414 /* | |
221 | 415 * Convert the string "str[orglen]" to do ignore-case comparing. Uses the |
416 * current locale. | |
130 | 417 * When "buf" is NULL returns an allocated string (NULL for out-of-memory). |
418 * Otherwise puts the result in "buf[buflen]". | |
7 | 419 */ |
420 char_u * | |
130 | 421 str_foldcase(str, orglen, buf, buflen) |
7 | 422 char_u *str; |
130 | 423 int orglen; |
424 char_u *buf; | |
425 int buflen; | |
7 | 426 { |
427 garray_T ga; | |
428 int i; | |
130 | 429 int len = orglen; |
7 | 430 |
431 #define GA_CHAR(i) ((char_u *)ga.ga_data)[i] | |
432 #define GA_PTR(i) ((char_u *)ga.ga_data + i) | |
130 | 433 #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i]) |
434 #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + i) | |
7 | 435 |
130 | 436 /* Copy "str" into "buf" or allocated memory, unmodified. */ |
437 if (buf == NULL) | |
438 { | |
439 ga_init2(&ga, 1, 10); | |
440 if (ga_grow(&ga, len + 1) == FAIL) | |
441 return NULL; | |
442 mch_memmove(ga.ga_data, str, (size_t)len); | |
443 ga.ga_len = len; | |
444 } | |
445 else | |
446 { | |
447 if (len >= buflen) /* Ugly! */ | |
448 len = buflen - 1; | |
449 mch_memmove(buf, str, (size_t)len); | |
450 } | |
451 if (buf == NULL) | |
452 GA_CHAR(len) = NUL; | |
453 else | |
454 buf[len] = NUL; | |
7 | 455 |
456 /* Make each character lower case. */ | |
457 i = 0; | |
130 | 458 while (STR_CHAR(i) != NUL) |
7 | 459 { |
460 #ifdef FEAT_MBYTE | |
130 | 461 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1)) |
7 | 462 { |
463 if (enc_utf8) | |
464 { | |
1654 | 465 int c = utf_ptr2char(STR_PTR(i)); |
3263 | 466 int olen = utf_ptr2len(STR_PTR(i)); |
1654 | 467 int lc = utf_tolower(c); |
7 | 468 |
1654 | 469 /* Only replace the character when it is not an invalid |
470 * sequence (ASCII character or more than one byte) and | |
471 * utf_tolower() doesn't return the original character. */ | |
3263 | 472 if ((c < 0x80 || olen > 1) && c != lc) |
7 | 473 { |
3263 | 474 int nlen = utf_char2len(lc); |
7 | 475 |
476 /* If the byte length changes need to shift the following | |
477 * characters forward or backward. */ | |
3263 | 478 if (olen != nlen) |
7 | 479 { |
3263 | 480 if (nlen > olen) |
130 | 481 { |
3263 | 482 if (buf == NULL |
483 ? ga_grow(&ga, nlen - olen + 1) == FAIL | |
484 : len + nlen - olen >= buflen) | |
7 | 485 { |
486 /* out of memory, keep old char */ | |
487 lc = c; | |
3263 | 488 nlen = olen; |
7 | 489 } |
130 | 490 } |
3263 | 491 if (olen != nlen) |
7 | 492 { |
130 | 493 if (buf == NULL) |
494 { | |
3263 | 495 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen); |
496 ga.ga_len += nlen - olen; | |
130 | 497 } |
498 else | |
499 { | |
3263 | 500 STRMOVE(buf + i + nlen, buf + i + olen); |
501 len += nlen - olen; | |
130 | 502 } |
7 | 503 } |
504 } | |
130 | 505 (void)utf_char2bytes(lc, STR_PTR(i)); |
7 | 506 } |
507 } | |
508 /* skip to next multi-byte char */ | |
474 | 509 i += (*mb_ptr2len)(STR_PTR(i)); |
7 | 510 } |
511 else | |
512 #endif | |
513 { | |
130 | 514 if (buf == NULL) |
515 GA_CHAR(i) = TOLOWER_LOC(GA_CHAR(i)); | |
516 else | |
517 buf[i] = TOLOWER_LOC(buf[i]); | |
7 | 518 ++i; |
519 } | |
520 } | |
521 | |
130 | 522 if (buf == NULL) |
523 return (char_u *)ga.ga_data; | |
524 return buf; | |
7 | 525 } |
526 #endif | |
527 | |
528 /* | |
529 * Catch 22: chartab[] can't be initialized before the options are | |
530 * initialized, and initializing options may cause transchar() to be called! | |
531 * When chartab_initialized == FALSE don't use chartab[]. | |
532 * Does NOT work for multi-byte characters, c must be <= 255. | |
533 * Also doesn't work for the first byte of a multi-byte, "c" must be a | |
534 * character! | |
535 */ | |
536 static char_u transchar_buf[7]; | |
537 | |
538 char_u * | |
539 transchar(c) | |
540 int c; | |
541 { | |
542 int i; | |
543 | |
544 i = 0; | |
545 if (IS_SPECIAL(c)) /* special key code, display as ~@ char */ | |
546 { | |
547 transchar_buf[0] = '~'; | |
548 transchar_buf[1] = '@'; | |
549 i = 2; | |
550 c = K_SECOND(c); | |
551 } | |
552 | |
553 if ((!chartab_initialized && ( | |
554 #ifdef EBCDIC | |
555 (c >= 64 && c < 255) | |
556 #else | |
557 (c >= ' ' && c <= '~') | |
558 #endif | |
559 #ifdef FEAT_FKMAP | |
560 || F_ischar(c) | |
561 #endif | |
562 )) || (c < 256 && vim_isprintc_strict(c))) | |
563 { | |
564 /* printable character */ | |
565 transchar_buf[i] = c; | |
566 transchar_buf[i + 1] = NUL; | |
567 } | |
568 else | |
569 transchar_nonprint(transchar_buf + i, c); | |
570 return transchar_buf; | |
571 } | |
572 | |
573 #if defined(FEAT_MBYTE) || defined(PROTO) | |
574 /* | |
575 * Like transchar(), but called with a byte instead of a character. Checks | |
576 * for an illegal UTF-8 byte. | |
577 */ | |
578 char_u * | |
579 transchar_byte(c) | |
580 int c; | |
581 { | |
582 if (enc_utf8 && c >= 0x80) | |
583 { | |
584 transchar_nonprint(transchar_buf, c); | |
585 return transchar_buf; | |
586 } | |
587 return transchar(c); | |
588 } | |
589 #endif | |
590 | |
591 /* | |
592 * Convert non-printable character to two or more printable characters in | |
593 * "buf[]". "buf" needs to be able to hold five bytes. | |
594 * Does NOT work for multi-byte characters, c must be <= 255. | |
595 */ | |
596 void | |
597 transchar_nonprint(buf, c) | |
598 char_u *buf; | |
599 int c; | |
600 { | |
601 if (c == NL) | |
602 c = NUL; /* we use newline in place of a NUL */ | |
603 else if (c == CAR && get_fileformat(curbuf) == EOL_MAC) | |
604 c = NL; /* we use CR in place of NL in this case */ | |
605 | |
606 if (dy_flags & DY_UHEX) /* 'display' has "uhex" */ | |
607 transchar_hex(buf, c); | |
608 | |
609 #ifdef EBCDIC | |
610 /* For EBCDIC only the characters 0-63 and 255 are not printable */ | |
611 else if (CtrlChar(c) != 0 || c == DEL) | |
612 #else | |
613 else if (c <= 0x7f) /* 0x00 - 0x1f and 0x7f */ | |
614 #endif | |
615 { | |
616 buf[0] = '^'; | |
617 #ifdef EBCDIC | |
618 if (c == DEL) | |
619 buf[1] = '?'; /* DEL displayed as ^? */ | |
620 else | |
621 buf[1] = CtrlChar(c); | |
622 #else | |
623 buf[1] = c ^ 0x40; /* DEL displayed as ^? */ | |
624 #endif | |
625 | |
626 buf[2] = NUL; | |
627 } | |
628 #ifdef FEAT_MBYTE | |
629 else if (enc_utf8 && c >= 0x80) | |
630 { | |
631 transchar_hex(buf, c); | |
632 } | |
633 #endif | |
634 #ifndef EBCDIC | |
635 else if (c >= ' ' + 0x80 && c <= '~' + 0x80) /* 0xa0 - 0xfe */ | |
636 { | |
637 buf[0] = '|'; | |
638 buf[1] = c - 0x80; | |
639 buf[2] = NUL; | |
640 } | |
641 #else | |
642 else if (c < 64) | |
643 { | |
644 buf[0] = '~'; | |
645 buf[1] = MetaChar(c); | |
646 buf[2] = NUL; | |
647 } | |
648 #endif | |
649 else /* 0x80 - 0x9f and 0xff */ | |
650 { | |
651 /* | |
652 * TODO: EBCDIC I don't know what to do with this chars, so I display | |
653 * them as '~?' for now | |
654 */ | |
655 buf[0] = '~'; | |
656 #ifdef EBCDIC | |
657 buf[1] = '?'; /* 0xff displayed as ~? */ | |
658 #else | |
659 buf[1] = (c - 0x80) ^ 0x40; /* 0xff displayed as ~? */ | |
660 #endif | |
661 buf[2] = NUL; | |
662 } | |
663 } | |
664 | |
665 void | |
666 transchar_hex(buf, c) | |
667 char_u *buf; | |
668 int c; | |
669 { | |
670 int i = 0; | |
671 | |
672 buf[0] = '<'; | |
673 #ifdef FEAT_MBYTE | |
674 if (c > 255) | |
675 { | |
676 buf[++i] = nr2hex((unsigned)c >> 12); | |
677 buf[++i] = nr2hex((unsigned)c >> 8); | |
678 } | |
679 #endif | |
680 buf[++i] = nr2hex((unsigned)c >> 4); | |
1869 | 681 buf[++i] = nr2hex((unsigned)c); |
7 | 682 buf[++i] = '>'; |
683 buf[++i] = NUL; | |
684 } | |
685 | |
686 /* | |
687 * Convert the lower 4 bits of byte "c" to its hex character. | |
688 * Lower case letters are used to avoid the confusion of <F1> being 0xf1 or | |
689 * function key 1. | |
690 */ | |
1869 | 691 static unsigned |
7 | 692 nr2hex(c) |
1869 | 693 unsigned c; |
7 | 694 { |
695 if ((c & 0xf) <= 9) | |
696 return (c & 0xf) + '0'; | |
697 return (c & 0xf) - 10 + 'a'; | |
698 } | |
699 | |
700 /* | |
701 * Return number of display cells occupied by byte "b". | |
702 * Caller must make sure 0 <= b <= 255. | |
703 * For multi-byte mode "b" must be the first byte of a character. | |
704 * A TAB is counted as two cells: "^I". | |
705 * For UTF-8 mode this will return 0 for bytes >= 0x80, because the number of | |
706 * cells depends on further bytes. | |
707 */ | |
708 int | |
709 byte2cells(b) | |
710 int b; | |
711 { | |
712 #ifdef FEAT_MBYTE | |
713 if (enc_utf8 && b >= 0x80) | |
714 return 0; | |
715 #endif | |
716 return (chartab[b] & CT_CELL_MASK); | |
717 } | |
718 | |
719 /* | |
720 * Return number of display cells occupied by character "c". | |
721 * "c" can be a special key (negative number) in which case 3 or 4 is returned. | |
722 * A TAB is counted as two cells: "^I" or four: "<09>". | |
723 */ | |
724 int | |
725 char2cells(c) | |
726 int c; | |
727 { | |
728 if (IS_SPECIAL(c)) | |
729 return char2cells(K_SECOND(c)) + 2; | |
730 #ifdef FEAT_MBYTE | |
731 if (c >= 0x80) | |
732 { | |
733 /* UTF-8: above 0x80 need to check the value */ | |
734 if (enc_utf8) | |
735 return utf_char2cells(c); | |
736 /* DBCS: double-byte means double-width, except for euc-jp with first | |
737 * byte 0x8e */ | |
738 if (enc_dbcs != 0 && c >= 0x100) | |
739 { | |
740 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e) | |
741 return 1; | |
742 return 2; | |
743 } | |
744 } | |
745 #endif | |
746 return (chartab[c & 0xff] & CT_CELL_MASK); | |
747 } | |
748 | |
749 /* | |
750 * Return number of display cells occupied by character at "*p". | |
751 * A TAB is counted as two cells: "^I" or four: "<09>". | |
752 */ | |
753 int | |
754 ptr2cells(p) | |
755 char_u *p; | |
756 { | |
757 #ifdef FEAT_MBYTE | |
758 /* For UTF-8 we need to look at more bytes if the first byte is >= 0x80. */ | |
759 if (enc_utf8 && *p >= 0x80) | |
760 return utf_ptr2cells(p); | |
761 /* For DBCS we can tell the cell count from the first byte. */ | |
762 #endif | |
763 return (chartab[*p] & CT_CELL_MASK); | |
764 } | |
765 | |
766 /* | |
3292 | 767 * Return the number of character cells string "s" will take on the screen, |
7 | 768 * counting TABs as two characters: "^I". |
769 */ | |
770 int | |
771 vim_strsize(s) | |
772 char_u *s; | |
773 { | |
774 return vim_strnsize(s, (int)MAXCOL); | |
775 } | |
776 | |
777 /* | |
3292 | 778 * Return the number of character cells string "s[len]" will take on the |
779 * screen, counting TABs as two characters: "^I". | |
7 | 780 */ |
781 int | |
782 vim_strnsize(s, len) | |
783 char_u *s; | |
784 int len; | |
785 { | |
786 int size = 0; | |
787 | |
788 while (*s != NUL && --len >= 0) | |
789 { | |
790 #ifdef FEAT_MBYTE | |
791 if (has_mbyte) | |
792 { | |
474 | 793 int l = (*mb_ptr2len)(s); |
7 | 794 |
795 size += ptr2cells(s); | |
796 s += l; | |
797 len -= l - 1; | |
798 } | |
799 else | |
800 #endif | |
801 size += byte2cells(*s++); | |
802 } | |
803 return size; | |
804 } | |
805 | |
806 /* | |
807 * Return the number of characters 'c' will take on the screen, taking | |
808 * into account the size of a tab. | |
809 * Use a define to make it fast, this is used very often!!! | |
810 * Also see getvcol() below. | |
811 */ | |
812 | |
813 #define RET_WIN_BUF_CHARTABSIZE(wp, buf, p, col) \ | |
814 if (*(p) == TAB && (!(wp)->w_p_list || lcs_tab1)) \ | |
815 { \ | |
816 int ts; \ | |
817 ts = (buf)->b_p_ts; \ | |
818 return (int)(ts - (col % ts)); \ | |
819 } \ | |
820 else \ | |
821 return ptr2cells(p); | |
822 | |
823 #if defined(FEAT_VREPLACE) || defined(FEAT_EX_EXTRA) || defined(FEAT_GUI) \ | |
824 || defined(FEAT_VIRTUALEDIT) || defined(PROTO) | |
825 int | |
826 chartabsize(p, col) | |
827 char_u *p; | |
828 colnr_T col; | |
829 { | |
830 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, p, col) | |
831 } | |
832 #endif | |
833 | |
834 #ifdef FEAT_LINEBREAK | |
835 static int | |
836 win_chartabsize(wp, p, col) | |
837 win_T *wp; | |
838 char_u *p; | |
839 colnr_T col; | |
840 { | |
841 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, p, col) | |
842 } | |
843 #endif | |
844 | |
845 /* | |
2339
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
846 * Return the number of characters the string 's' will take on the screen, |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
847 * taking into account the size of a tab. |
7 | 848 */ |
849 int | |
850 linetabsize(s) | |
851 char_u *s; | |
852 { | |
2339
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
853 return linetabsize_col(0, s); |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
854 } |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
855 |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
856 /* |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
857 * Like linetabsize(), but starting at column "startcol". |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
858 */ |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
859 int |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
860 linetabsize_col(startcol, s) |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
861 int startcol; |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
862 char_u *s; |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
863 { |
01e4b4d37842
Added strdisplaywidth() function.
Bram Moolenaar <bram@vim.org>
parents:
2108
diff
changeset
|
864 colnr_T col = startcol; |
7 | 865 |
866 while (*s != NUL) | |
867 col += lbr_chartabsize_adv(&s, col); | |
868 return (int)col; | |
869 } | |
870 | |
871 /* | |
872 * Like linetabsize(), but for a given window instead of the current one. | |
873 */ | |
874 int | |
875 win_linetabsize(wp, p, len) | |
876 win_T *wp; | |
877 char_u *p; | |
878 colnr_T len; | |
879 { | |
880 colnr_T col = 0; | |
881 char_u *s; | |
882 | |
40 | 883 for (s = p; *s != NUL && (len == MAXCOL || s < p + len); mb_ptr_adv(s)) |
7 | 884 col += win_lbr_chartabsize(wp, s, col, NULL); |
885 return (int)col; | |
886 } | |
887 | |
888 /* | |
42 | 889 * Return TRUE if 'c' is a normal identifier character: |
890 * Letters and characters from the 'isident' option. | |
7 | 891 */ |
892 int | |
893 vim_isIDc(c) | |
894 int c; | |
895 { | |
896 return (c > 0 && c < 0x100 && (chartab[c] & CT_ID_CHAR)); | |
897 } | |
898 | |
899 /* | |
900 * return TRUE if 'c' is a keyword character: Letters and characters from | |
901 * 'iskeyword' option for current buffer. | |
902 * For multi-byte characters mb_get_class() is used (builtin rules). | |
903 */ | |
904 int | |
905 vim_iswordc(c) | |
906 int c; | |
907 { | |
908 #ifdef FEAT_MBYTE | |
909 if (c >= 0x100) | |
910 { | |
911 if (enc_dbcs != 0) | |
1869 | 912 return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2; |
7 | 913 if (enc_utf8) |
914 return utf_class(c) >= 2; | |
915 } | |
916 #endif | |
917 return (c > 0 && c < 0x100 && GET_CHARTAB(curbuf, c) != 0); | |
918 } | |
919 | |
920 /* | |
921 * Just like vim_iswordc() but uses a pointer to the (multi-byte) character. | |
922 */ | |
923 int | |
924 vim_iswordp(p) | |
925 char_u *p; | |
926 { | |
927 #ifdef FEAT_MBYTE | |
928 if (has_mbyte && MB_BYTE2LEN(*p) > 1) | |
929 return mb_get_class(p) >= 2; | |
930 #endif | |
931 return GET_CHARTAB(curbuf, *p) != 0; | |
932 } | |
933 | |
934 #if defined(FEAT_SYN_HL) || defined(PROTO) | |
935 int | |
936 vim_iswordc_buf(p, buf) | |
937 char_u *p; | |
938 buf_T *buf; | |
939 { | |
940 # ifdef FEAT_MBYTE | |
941 if (has_mbyte && MB_BYTE2LEN(*p) > 1) | |
942 return mb_get_class(p) >= 2; | |
943 # endif | |
944 return (GET_CHARTAB(buf, *p) != 0); | |
945 } | |
741 | 946 #endif |
7 | 947 |
948 /* | |
949 * return TRUE if 'c' is a valid file-name character | |
950 * Assume characters above 0x100 are valid (multi-byte). | |
951 */ | |
952 int | |
953 vim_isfilec(c) | |
954 int c; | |
955 { | |
956 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_FNAME_CHAR))); | |
957 } | |
958 | |
959 /* | |
1369 | 960 * return TRUE if 'c' is a valid file-name character or a wildcard character |
961 * Assume characters above 0x100 are valid (multi-byte). | |
962 * Explicitly interpret ']' as a wildcard character as mch_has_wildcard("]") | |
963 * returns false. | |
964 */ | |
965 int | |
966 vim_isfilec_or_wc(c) | |
967 int c; | |
968 { | |
969 char_u buf[2]; | |
970 | |
971 buf[0] = (char_u)c; | |
972 buf[1] = NUL; | |
973 return vim_isfilec(c) || c == ']' || mch_has_wildcard(buf); | |
974 } | |
975 | |
976 /* | |
7 | 977 * return TRUE if 'c' is a printable character |
978 * Assume characters above 0x100 are printable (multi-byte), except for | |
979 * Unicode. | |
980 */ | |
981 int | |
982 vim_isprintc(c) | |
983 int c; | |
984 { | |
985 #ifdef FEAT_MBYTE | |
986 if (enc_utf8 && c >= 0x100) | |
987 return utf_printable(c); | |
988 #endif | |
989 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR))); | |
990 } | |
991 | |
992 /* | |
993 * Strict version of vim_isprintc(c), don't return TRUE if "c" is the head | |
994 * byte of a double-byte character. | |
995 */ | |
996 int | |
997 vim_isprintc_strict(c) | |
998 int c; | |
999 { | |
1000 #ifdef FEAT_MBYTE | |
1001 if (enc_dbcs != 0 && c < 0x100 && MB_BYTE2LEN(c) > 1) | |
1002 return FALSE; | |
1003 if (enc_utf8 && c >= 0x100) | |
1004 return utf_printable(c); | |
1005 #endif | |
1006 return (c >= 0x100 || (c > 0 && (chartab[c] & CT_PRINT_CHAR))); | |
1007 } | |
1008 | |
1009 /* | |
1010 * like chartabsize(), but also check for line breaks on the screen | |
1011 */ | |
1012 int | |
1013 lbr_chartabsize(s, col) | |
1014 unsigned char *s; | |
1015 colnr_T col; | |
1016 { | |
1017 #ifdef FEAT_LINEBREAK | |
1018 if (!curwin->w_p_lbr && *p_sbr == NUL) | |
1019 { | |
1020 #endif | |
1021 #ifdef FEAT_MBYTE | |
1022 if (curwin->w_p_wrap) | |
1023 return win_nolbr_chartabsize(curwin, s, col, NULL); | |
1024 #endif | |
1025 RET_WIN_BUF_CHARTABSIZE(curwin, curbuf, s, col) | |
1026 #ifdef FEAT_LINEBREAK | |
1027 } | |
1028 return win_lbr_chartabsize(curwin, s, col, NULL); | |
1029 #endif | |
1030 } | |
1031 | |
1032 /* | |
1033 * Call lbr_chartabsize() and advance the pointer. | |
1034 */ | |
1035 int | |
1036 lbr_chartabsize_adv(s, col) | |
1037 char_u **s; | |
1038 colnr_T col; | |
1039 { | |
1040 int retval; | |
1041 | |
1042 retval = lbr_chartabsize(*s, col); | |
39 | 1043 mb_ptr_adv(*s); |
7 | 1044 return retval; |
1045 } | |
1046 | |
1047 /* | |
1048 * This function is used very often, keep it fast!!!! | |
1049 * | |
1050 * If "headp" not NULL, set *headp to the size of what we for 'showbreak' | |
1051 * string at start of line. Warning: *headp is only set if it's a non-zero | |
1052 * value, init to 0 before calling. | |
1053 */ | |
1054 int | |
1055 win_lbr_chartabsize(wp, s, col, headp) | |
1056 win_T *wp; | |
1057 char_u *s; | |
1058 colnr_T col; | |
1876 | 1059 int *headp UNUSED; |
7 | 1060 { |
1061 #ifdef FEAT_LINEBREAK | |
1062 int c; | |
1063 int size; | |
1064 colnr_T col2; | |
1065 colnr_T colmax; | |
1066 int added; | |
1067 # ifdef FEAT_MBYTE | |
1068 int mb_added = 0; | |
1069 # else | |
1070 # define mb_added 0 | |
1071 # endif | |
1072 int numberextra; | |
1073 char_u *ps; | |
1074 int tab_corr = (*s == TAB); | |
236 | 1075 int n; |
7 | 1076 |
1077 /* | |
1078 * No 'linebreak' and 'showbreak': return quickly. | |
1079 */ | |
1080 if (!wp->w_p_lbr && *p_sbr == NUL) | |
1081 #endif | |
1082 { | |
1083 #ifdef FEAT_MBYTE | |
1084 if (wp->w_p_wrap) | |
1085 return win_nolbr_chartabsize(wp, s, col, headp); | |
1086 #endif | |
1087 RET_WIN_BUF_CHARTABSIZE(wp, wp->w_buffer, s, col) | |
1088 } | |
1089 | |
1090 #ifdef FEAT_LINEBREAK | |
1091 /* | |
1092 * First get normal size, without 'linebreak' | |
1093 */ | |
1094 size = win_chartabsize(wp, s, col); | |
1095 c = *s; | |
1096 | |
1097 /* | |
1098 * If 'linebreak' set check at a blank before a non-blank if the line | |
1099 * needs a break here | |
1100 */ | |
1101 if (wp->w_p_lbr | |
1102 && vim_isbreak(c) | |
1103 && !vim_isbreak(s[1]) | |
1104 && !wp->w_p_list | |
1105 && wp->w_p_wrap | |
1106 # ifdef FEAT_VERTSPLIT | |
1107 && wp->w_width != 0 | |
1108 # endif | |
1109 ) | |
1110 { | |
1111 /* | |
1112 * Count all characters from first non-blank after a blank up to next | |
1113 * non-blank after a blank. | |
1114 */ | |
1115 numberextra = win_col_off(wp); | |
1116 col2 = col; | |
1869 | 1117 colmax = (colnr_T)(W_WIDTH(wp) - numberextra); |
7 | 1118 if (col >= colmax) |
236 | 1119 { |
1120 n = colmax + win_col_off2(wp); | |
1121 if (n > 0) | |
1122 colmax += (((col - colmax) / n) + 1) * n; | |
1123 } | |
1124 | |
7 | 1125 for (;;) |
1126 { | |
1127 ps = s; | |
39 | 1128 mb_ptr_adv(s); |
7 | 1129 c = *s; |
1130 if (!(c != NUL | |
1131 && (vim_isbreak(c) | |
1132 || (!vim_isbreak(c) | |
1133 && (col2 == col || !vim_isbreak(*ps)))))) | |
1134 break; | |
1135 | |
1136 col2 += win_chartabsize(wp, s, col2); | |
1137 if (col2 >= colmax) /* doesn't fit */ | |
1138 { | |
1139 size = colmax - col; | |
1140 tab_corr = FALSE; | |
1141 break; | |
1142 } | |
1143 } | |
1144 } | |
1145 # ifdef FEAT_MBYTE | |
1146 else if (has_mbyte && size == 2 && MB_BYTE2LEN(*s) > 1 | |
1147 && wp->w_p_wrap && in_win_border(wp, col)) | |
1148 { | |
1149 ++size; /* Count the ">" in the last column. */ | |
1150 mb_added = 1; | |
1151 } | |
1152 # endif | |
1153 | |
1154 /* | |
1155 * May have to add something for 'showbreak' string at start of line | |
1156 * Set *headp to the size of what we add. | |
1157 */ | |
1158 added = 0; | |
1159 if (*p_sbr != NUL && wp->w_p_wrap && col != 0) | |
1160 { | |
1161 numberextra = win_col_off(wp); | |
1162 col += numberextra + mb_added; | |
1163 if (col >= (colnr_T)W_WIDTH(wp)) | |
1164 { | |
1165 col -= W_WIDTH(wp); | |
1166 numberextra = W_WIDTH(wp) - (numberextra - win_col_off2(wp)); | |
1167 if (numberextra > 0) | |
1168 col = col % numberextra; | |
1169 } | |
1170 if (col == 0 || col + size > (colnr_T)W_WIDTH(wp)) | |
1171 { | |
1172 added = vim_strsize(p_sbr); | |
1173 if (tab_corr) | |
1174 size += (added / wp->w_buffer->b_p_ts) * wp->w_buffer->b_p_ts; | |
1175 else | |
1176 size += added; | |
1177 if (col != 0) | |
1178 added = 0; | |
1179 } | |
1180 } | |
1181 if (headp != NULL) | |
1182 *headp = added + mb_added; | |
1183 return size; | |
1184 #endif | |
1185 } | |
1186 | |
1187 #if defined(FEAT_MBYTE) || defined(PROTO) | |
1188 /* | |
1189 * Like win_lbr_chartabsize(), except that we know 'linebreak' is off and | |
1190 * 'wrap' is on. This means we need to check for a double-byte character that | |
1191 * doesn't fit at the end of the screen line. | |
1192 */ | |
1193 static int | |
1194 win_nolbr_chartabsize(wp, s, col, headp) | |
1195 win_T *wp; | |
1196 char_u *s; | |
1197 colnr_T col; | |
1198 int *headp; | |
1199 { | |
1200 int n; | |
1201 | |
1202 if (*s == TAB && (!wp->w_p_list || lcs_tab1)) | |
1203 { | |
1204 n = wp->w_buffer->b_p_ts; | |
1205 return (int)(n - (col % n)); | |
1206 } | |
1207 n = ptr2cells(s); | |
1208 /* Add one cell for a double-width character in the last column of the | |
1209 * window, displayed with a ">". */ | |
1210 if (n == 2 && MB_BYTE2LEN(*s) > 1 && in_win_border(wp, col)) | |
1211 { | |
1212 if (headp != NULL) | |
1213 *headp = 1; | |
1214 return 3; | |
1215 } | |
1216 return n; | |
1217 } | |
1218 | |
1219 /* | |
1220 * Return TRUE if virtual column "vcol" is in the rightmost column of window | |
1221 * "wp". | |
1222 */ | |
1223 int | |
1224 in_win_border(wp, vcol) | |
1225 win_T *wp; | |
1226 colnr_T vcol; | |
1227 { | |
1869 | 1228 int width1; /* width of first line (after line number) */ |
1229 int width2; /* width of further lines */ | |
7 | 1230 |
1231 #ifdef FEAT_VERTSPLIT | |
1232 if (wp->w_width == 0) /* there is no border */ | |
1233 return FALSE; | |
1234 #endif | |
1235 width1 = W_WIDTH(wp) - win_col_off(wp); | |
1869 | 1236 if ((int)vcol < width1 - 1) |
7 | 1237 return FALSE; |
1869 | 1238 if ((int)vcol == width1 - 1) |
7 | 1239 return TRUE; |
1240 width2 = width1 + win_col_off2(wp); | |
1970 | 1241 if (width2 <= 0) |
1242 return FALSE; | |
7 | 1243 return ((vcol - width1) % width2 == width2 - 1); |
1244 } | |
1245 #endif /* FEAT_MBYTE */ | |
1246 | |
1247 /* | |
1248 * Get virtual column number of pos. | |
1249 * start: on the first position of this character (TAB, ctrl) | |
1250 * cursor: where the cursor is on this character (first char, except for TAB) | |
1251 * end: on the last position of this character (TAB, ctrl) | |
1252 * | |
1253 * This is used very often, keep it fast! | |
1254 */ | |
1255 void | |
1256 getvcol(wp, pos, start, cursor, end) | |
1257 win_T *wp; | |
1258 pos_T *pos; | |
1259 colnr_T *start; | |
1260 colnr_T *cursor; | |
1261 colnr_T *end; | |
1262 { | |
1263 colnr_T vcol; | |
1264 char_u *ptr; /* points to current char */ | |
1265 char_u *posptr; /* points to char at pos->col */ | |
1266 int incr; | |
1267 int head; | |
1268 int ts = wp->w_buffer->b_p_ts; | |
1269 int c; | |
1270 | |
1271 vcol = 0; | |
1272 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE); | |
2108
3cdf2a653e00
updated for version 7.2.391
Bram Moolenaar <bram@zimbu.org>
parents:
1979
diff
changeset
|
1273 if (pos->col == MAXCOL) |
3cdf2a653e00
updated for version 7.2.391
Bram Moolenaar <bram@zimbu.org>
parents:
1979
diff
changeset
|
1274 posptr = NULL; /* continue until the NUL */ |
3cdf2a653e00
updated for version 7.2.391
Bram Moolenaar <bram@zimbu.org>
parents:
1979
diff
changeset
|
1275 else |
3cdf2a653e00
updated for version 7.2.391
Bram Moolenaar <bram@zimbu.org>
parents:
1979
diff
changeset
|
1276 posptr = ptr + pos->col; |
7 | 1277 |
1278 /* | |
1279 * This function is used very often, do some speed optimizations. | |
1280 * When 'list', 'linebreak' and 'showbreak' are not set use a simple loop. | |
1281 * Also use this when 'list' is set but tabs take their normal size. | |
1282 */ | |
1283 if ((!wp->w_p_list || lcs_tab1 != NUL) | |
1284 #ifdef FEAT_LINEBREAK | |
1285 && !wp->w_p_lbr && *p_sbr == NUL | |
1286 #endif | |
1287 ) | |
1288 { | |
1289 #ifndef FEAT_MBYTE | |
1290 head = 0; | |
1291 #endif | |
1292 for (;;) | |
1293 { | |
1294 #ifdef FEAT_MBYTE | |
1295 head = 0; | |
1296 #endif | |
1297 c = *ptr; | |
1298 /* make sure we don't go past the end of the line */ | |
1299 if (c == NUL) | |
1300 { | |
1301 incr = 1; /* NUL at end of line only takes one column */ | |
1302 break; | |
1303 } | |
1304 /* A tab gets expanded, depending on the current column */ | |
1305 if (c == TAB) | |
1306 incr = ts - (vcol % ts); | |
1307 else | |
1308 { | |
1309 #ifdef FEAT_MBYTE | |
1310 if (has_mbyte) | |
1311 { | |
1312 /* For utf-8, if the byte is >= 0x80, need to look at | |
1313 * further bytes to find the cell width. */ | |
1314 if (enc_utf8 && c >= 0x80) | |
1315 incr = utf_ptr2cells(ptr); | |
1316 else | |
1317 incr = CHARSIZE(c); | |
1318 | |
1319 /* If a double-cell char doesn't fit at the end of a line | |
1320 * it wraps to the next line, it's like this char is three | |
1321 * cells wide. */ | |
1546 | 1322 if (incr == 2 && wp->w_p_wrap && MB_BYTE2LEN(*ptr) > 1 |
1323 && in_win_border(wp, vcol)) | |
7 | 1324 { |
1325 ++incr; | |
1326 head = 1; | |
1327 } | |
1328 } | |
1329 else | |
1330 #endif | |
1331 incr = CHARSIZE(c); | |
1332 } | |
1333 | |
2108
3cdf2a653e00
updated for version 7.2.391
Bram Moolenaar <bram@zimbu.org>
parents:
1979
diff
changeset
|
1334 if (posptr != NULL && ptr >= posptr) /* character at pos->col */ |
7 | 1335 break; |
1336 | |
1337 vcol += incr; | |
39 | 1338 mb_ptr_adv(ptr); |
7 | 1339 } |
1340 } | |
1341 else | |
1342 { | |
1343 for (;;) | |
1344 { | |
1345 /* A tab gets expanded, depending on the current column */ | |
1346 head = 0; | |
1347 incr = win_lbr_chartabsize(wp, ptr, vcol, &head); | |
1348 /* make sure we don't go past the end of the line */ | |
1349 if (*ptr == NUL) | |
1350 { | |
1351 incr = 1; /* NUL at end of line only takes one column */ | |
1352 break; | |
1353 } | |
1354 | |
2108
3cdf2a653e00
updated for version 7.2.391
Bram Moolenaar <bram@zimbu.org>
parents:
1979
diff
changeset
|
1355 if (posptr != NULL && ptr >= posptr) /* character at pos->col */ |
7 | 1356 break; |
1357 | |
1358 vcol += incr; | |
39 | 1359 mb_ptr_adv(ptr); |
7 | 1360 } |
1361 } | |
1362 if (start != NULL) | |
1363 *start = vcol + head; | |
1364 if (end != NULL) | |
1365 *end = vcol + incr - 1; | |
1366 if (cursor != NULL) | |
1367 { | |
1368 if (*ptr == TAB | |
1369 && (State & NORMAL) | |
1370 && !wp->w_p_list | |
1371 && !virtual_active() | |
1372 #ifdef FEAT_VISUAL | |
1373 && !(VIsual_active | |
1374 && (*p_sel == 'e' || ltoreq(*pos, VIsual))) | |
1375 #endif | |
1376 ) | |
1377 *cursor = vcol + incr - 1; /* cursor at end */ | |
1378 else | |
1379 *cursor = vcol + head; /* cursor at start */ | |
1380 } | |
1381 } | |
1382 | |
1383 /* | |
1384 * Get virtual cursor column in the current window, pretending 'list' is off. | |
1385 */ | |
1386 colnr_T | |
1387 getvcol_nolist(posp) | |
1388 pos_T *posp; | |
1389 { | |
1390 int list_save = curwin->w_p_list; | |
1391 colnr_T vcol; | |
1392 | |
1393 curwin->w_p_list = FALSE; | |
1394 getvcol(curwin, posp, NULL, &vcol, NULL); | |
1395 curwin->w_p_list = list_save; | |
1396 return vcol; | |
1397 } | |
1398 | |
1399 #if defined(FEAT_VIRTUALEDIT) || defined(PROTO) | |
1400 /* | |
1401 * Get virtual column in virtual mode. | |
1402 */ | |
1403 void | |
1404 getvvcol(wp, pos, start, cursor, end) | |
1405 win_T *wp; | |
1406 pos_T *pos; | |
1407 colnr_T *start; | |
1408 colnr_T *cursor; | |
1409 colnr_T *end; | |
1410 { | |
1411 colnr_T col; | |
1412 colnr_T coladd; | |
1413 colnr_T endadd; | |
1414 # ifdef FEAT_MBYTE | |
1415 char_u *ptr; | |
1416 # endif | |
1417 | |
1418 if (virtual_active()) | |
1419 { | |
1420 /* For virtual mode, only want one value */ | |
1421 getvcol(wp, pos, &col, NULL, NULL); | |
1422 | |
1423 coladd = pos->coladd; | |
1424 endadd = 0; | |
1425 # ifdef FEAT_MBYTE | |
1426 /* Cannot put the cursor on part of a wide character. */ | |
1427 ptr = ml_get_buf(wp->w_buffer, pos->lnum, FALSE); | |
1869 | 1428 if (pos->col < (colnr_T)STRLEN(ptr)) |
7 | 1429 { |
1430 int c = (*mb_ptr2char)(ptr + pos->col); | |
1431 | |
1432 if (c != TAB && vim_isprintc(c)) | |
1433 { | |
1869 | 1434 endadd = (colnr_T)(char2cells(c) - 1); |
557 | 1435 if (coladd > endadd) /* past end of line */ |
1436 endadd = 0; | |
7 | 1437 else |
1438 coladd = 0; | |
1439 } | |
1440 } | |
1441 # endif | |
1442 col += coladd; | |
1443 if (start != NULL) | |
1444 *start = col; | |
1445 if (cursor != NULL) | |
1446 *cursor = col; | |
1447 if (end != NULL) | |
1448 *end = col + endadd; | |
1449 } | |
1450 else | |
1451 getvcol(wp, pos, start, cursor, end); | |
1452 } | |
1453 #endif | |
1454 | |
1455 #if defined(FEAT_VISUAL) || defined(PROTO) | |
1456 /* | |
1457 * Get the leftmost and rightmost virtual column of pos1 and pos2. | |
1458 * Used for Visual block mode. | |
1459 */ | |
1460 void | |
1461 getvcols(wp, pos1, pos2, left, right) | |
1462 win_T *wp; | |
1463 pos_T *pos1, *pos2; | |
1464 colnr_T *left, *right; | |
1465 { | |
1466 colnr_T from1, from2, to1, to2; | |
1467 | |
1468 if (ltp(pos1, pos2)) | |
1469 { | |
1470 getvvcol(wp, pos1, &from1, NULL, &to1); | |
1471 getvvcol(wp, pos2, &from2, NULL, &to2); | |
1472 } | |
1473 else | |
1474 { | |
1475 getvvcol(wp, pos2, &from1, NULL, &to1); | |
1476 getvvcol(wp, pos1, &from2, NULL, &to2); | |
1477 } | |
1478 if (from2 < from1) | |
1479 *left = from2; | |
1480 else | |
1481 *left = from1; | |
1482 if (to2 > to1) | |
1483 { | |
1484 if (*p_sel == 'e' && from2 - 1 >= to1) | |
1485 *right = from2 - 1; | |
1486 else | |
1487 *right = to2; | |
1488 } | |
1489 else | |
1490 *right = to1; | |
1491 } | |
1492 #endif | |
1493 | |
1494 /* | |
1495 * skipwhite: skip over ' ' and '\t'. | |
1496 */ | |
1497 char_u * | |
1687 | 1498 skipwhite(q) |
1499 char_u *q; | |
7 | 1500 { |
1687 | 1501 char_u *p = q; |
1502 | |
7 | 1503 while (vim_iswhite(*p)) /* skip to next non-white */ |
1504 ++p; | |
1505 return p; | |
1506 } | |
1507 | |
1508 /* | |
293 | 1509 * skip over digits |
7 | 1510 */ |
1511 char_u * | |
1687 | 1512 skipdigits(q) |
1513 char_u *q; | |
7 | 1514 { |
1687 | 1515 char_u *p = q; |
1516 | |
7 | 1517 while (VIM_ISDIGIT(*p)) /* skip to next non-digit */ |
1518 ++p; | |
1519 return p; | |
1520 } | |
1521 | |
741 | 1522 #if defined(FEAT_SYN_HL) || defined(FEAT_SPELL) || defined(PROTO) |
301 | 1523 /* |
1524 * skip over digits and hex characters | |
1525 */ | |
1526 char_u * | |
1687 | 1527 skiphex(q) |
1528 char_u *q; | |
301 | 1529 { |
1687 | 1530 char_u *p = q; |
1531 | |
301 | 1532 while (vim_isxdigit(*p)) /* skip to next non-digit */ |
1533 ++p; | |
1534 return p; | |
1535 } | |
1536 #endif | |
1537 | |
293 | 1538 #if defined(FEAT_EX_EXTRA) || defined(PROTO) |
1539 /* | |
1540 * skip to digit (or NUL after the string) | |
1541 */ | |
1542 char_u * | |
1687 | 1543 skiptodigit(q) |
1544 char_u *q; | |
293 | 1545 { |
1687 | 1546 char_u *p = q; |
1547 | |
293 | 1548 while (*p != NUL && !VIM_ISDIGIT(*p)) /* skip to next digit */ |
1549 ++p; | |
1550 return p; | |
1551 } | |
1552 | |
1553 /* | |
1554 * skip to hex character (or NUL after the string) | |
1555 */ | |
1556 char_u * | |
1687 | 1557 skiptohex(q) |
1558 char_u *q; | |
293 | 1559 { |
1687 | 1560 char_u *p = q; |
1561 | |
293 | 1562 while (*p != NUL && !vim_isxdigit(*p)) /* skip to next digit */ |
1563 ++p; | |
1564 return p; | |
1565 } | |
1566 #endif | |
1567 | |
7 | 1568 /* |
1569 * Variant of isdigit() that can handle characters > 0x100. | |
1570 * We don't use isdigit() here, because on some systems it also considers | |
1571 * superscript 1 to be a digit. | |
1572 * Use the VIM_ISDIGIT() macro for simple arguments. | |
1573 */ | |
1574 int | |
1575 vim_isdigit(c) | |
1576 int c; | |
1577 { | |
1578 return (c >= '0' && c <= '9'); | |
1579 } | |
1580 | |
1581 /* | |
1582 * Variant of isxdigit() that can handle characters > 0x100. | |
1583 * We don't use isxdigit() here, because on some systems it also considers | |
1584 * superscript 1 to be a digit. | |
1585 */ | |
1586 int | |
1587 vim_isxdigit(c) | |
1588 int c; | |
1589 { | |
1590 return (c >= '0' && c <= '9') | |
1591 || (c >= 'a' && c <= 'f') | |
1592 || (c >= 'A' && c <= 'F'); | |
1593 } | |
1594 | |
492 | 1595 #if defined(FEAT_MBYTE) || defined(PROTO) |
1596 /* | |
1597 * Vim's own character class functions. These exist because many library | |
1598 * islower()/toupper() etc. do not work properly: they crash when used with | |
1599 * invalid values or can't handle latin1 when the locale is C. | |
1600 * Speed is most important here. | |
1601 */ | |
1602 #define LATIN1LOWER 'l' | |
1603 #define LATIN1UPPER 'U' | |
1604 | |
497 | 1605 static char_u latin1flags[257] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll"; |
3533 | 1606 static char_u latin1upper[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xf7\xd8\xd9\xda\xdb\xdc\xdd\xde\xff"; |
1607 static char_u latin1lower[257] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xd7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; | |
492 | 1608 |
1609 int | |
1610 vim_islower(c) | |
1611 int c; | |
1612 { | |
1613 if (c <= '@') | |
1614 return FALSE; | |
1615 if (c >= 0x80) | |
1616 { | |
1617 if (enc_utf8) | |
1618 return utf_islower(c); | |
1619 if (c >= 0x100) | |
1620 { | |
1621 #ifdef HAVE_ISWLOWER | |
1622 if (has_mbyte) | |
1623 return iswlower(c); | |
1624 #endif | |
1625 /* islower() can't handle these chars and may crash */ | |
1626 return FALSE; | |
1627 } | |
1628 if (enc_latin1like) | |
1629 return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER; | |
1630 } | |
1631 return islower(c); | |
1632 } | |
1633 | |
1634 int | |
1635 vim_isupper(c) | |
1636 int c; | |
1637 { | |
1638 if (c <= '@') | |
1639 return FALSE; | |
1640 if (c >= 0x80) | |
1641 { | |
1642 if (enc_utf8) | |
1643 return utf_isupper(c); | |
1644 if (c >= 0x100) | |
1645 { | |
1646 #ifdef HAVE_ISWUPPER | |
1647 if (has_mbyte) | |
1648 return iswupper(c); | |
1649 #endif | |
1650 /* islower() can't handle these chars and may crash */ | |
1651 return FALSE; | |
1652 } | |
1653 if (enc_latin1like) | |
1654 return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER; | |
1655 } | |
1656 return isupper(c); | |
1657 } | |
1658 | |
1659 int | |
1660 vim_toupper(c) | |
1661 int c; | |
1662 { | |
1663 if (c <= '@') | |
1664 return c; | |
1665 if (c >= 0x80) | |
1666 { | |
1667 if (enc_utf8) | |
1668 return utf_toupper(c); | |
1669 if (c >= 0x100) | |
1670 { | |
1671 #ifdef HAVE_TOWUPPER | |
1672 if (has_mbyte) | |
1673 return towupper(c); | |
1674 #endif | |
1675 /* toupper() can't handle these chars and may crash */ | |
1676 return c; | |
1677 } | |
1678 if (enc_latin1like) | |
1679 return latin1upper[c]; | |
1680 } | |
1681 return TOUPPER_LOC(c); | |
1682 } | |
1683 | |
1684 int | |
1685 vim_tolower(c) | |
1686 int c; | |
1687 { | |
1688 if (c <= '@') | |
1689 return c; | |
1690 if (c >= 0x80) | |
1691 { | |
1692 if (enc_utf8) | |
1693 return utf_tolower(c); | |
1694 if (c >= 0x100) | |
1695 { | |
1696 #ifdef HAVE_TOWLOWER | |
1697 if (has_mbyte) | |
1698 return towlower(c); | |
1699 #endif | |
1700 /* tolower() can't handle these chars and may crash */ | |
1701 return c; | |
1702 } | |
1703 if (enc_latin1like) | |
1704 return latin1lower[c]; | |
1705 } | |
1706 return TOLOWER_LOC(c); | |
1707 } | |
1708 #endif | |
1709 | |
7 | 1710 /* |
1711 * skiptowhite: skip over text until ' ' or '\t' or NUL. | |
1712 */ | |
1713 char_u * | |
1714 skiptowhite(p) | |
1715 char_u *p; | |
1716 { | |
1717 while (*p != ' ' && *p != '\t' && *p != NUL) | |
1718 ++p; | |
1719 return p; | |
1720 } | |
1721 | |
1722 #if defined(FEAT_LISTCMDS) || defined(FEAT_SIGNS) || defined(FEAT_SNIFF) \ | |
1723 || defined(PROTO) | |
1724 /* | |
1725 * skiptowhite_esc: Like skiptowhite(), but also skip escaped chars | |
1726 */ | |
1727 char_u * | |
1728 skiptowhite_esc(p) | |
1729 char_u *p; | |
1730 { | |
1731 while (*p != ' ' && *p != '\t' && *p != NUL) | |
1732 { | |
1733 if ((*p == '\\' || *p == Ctrl_V) && *(p + 1) != NUL) | |
1734 ++p; | |
1735 ++p; | |
1736 } | |
1737 return p; | |
1738 } | |
1739 #endif | |
1740 | |
1741 /* | |
1742 * Getdigits: Get a number from a string and skip over it. | |
1743 * Note: the argument is a pointer to a char_u pointer! | |
1744 */ | |
1745 long | |
1746 getdigits(pp) | |
1747 char_u **pp; | |
1748 { | |
1749 char_u *p; | |
1750 long retval; | |
1751 | |
1752 p = *pp; | |
1753 retval = atol((char *)p); | |
1754 if (*p == '-') /* skip negative sign */ | |
1755 ++p; | |
1756 p = skipdigits(p); /* skip to next non-digit */ | |
1757 *pp = p; | |
1758 return retval; | |
1759 } | |
1760 | |
1761 /* | |
1762 * Return TRUE if "lbuf" is empty or only contains blanks. | |
1763 */ | |
1764 int | |
1765 vim_isblankline(lbuf) | |
1766 char_u *lbuf; | |
1767 { | |
1768 char_u *p; | |
1769 | |
1770 p = skipwhite(lbuf); | |
1771 return (*p == NUL || *p == '\r' || *p == '\n'); | |
1772 } | |
1773 | |
1774 /* | |
1775 * Convert a string into a long and/or unsigned long, taking care of | |
16 | 1776 * hexadecimal and octal numbers. Accepts a '-' sign. |
7 | 1777 * If "hexp" is not NULL, returns a flag to indicate the type of the number: |
1778 * 0 decimal | |
1779 * '0' octal | |
1780 * 'X' hex | |
1781 * 'x' hex | |
1782 * If "len" is not NULL, the length of the number in characters is returned. | |
1783 * If "nptr" is not NULL, the signed result is returned in it. | |
1784 * If "unptr" is not NULL, the unsigned result is returned in it. | |
293 | 1785 * If "dooct" is non-zero recognize octal numbers, when > 1 always assume |
1786 * octal number. | |
782 | 1787 * If "dohex" is non-zero recognize hex numbers, when > 1 always assume |
293 | 1788 * hex number. |
7 | 1789 */ |
1790 void | |
1791 vim_str2nr(start, hexp, len, dooct, dohex, nptr, unptr) | |
1792 char_u *start; | |
1793 int *hexp; /* return: type of number 0 = decimal, 'x' | |
1794 or 'X' is hex, '0' = octal */ | |
1795 int *len; /* return: detected length of number */ | |
1796 int dooct; /* recognize octal number */ | |
1797 int dohex; /* recognize hex number */ | |
1798 long *nptr; /* return: signed result */ | |
1799 unsigned long *unptr; /* return: unsigned result */ | |
1800 { | |
1801 char_u *ptr = start; | |
1802 int hex = 0; /* default is decimal */ | |
1803 int negative = FALSE; | |
1804 unsigned long un = 0; | |
39 | 1805 int n; |
7 | 1806 |
1807 if (ptr[0] == '-') | |
1808 { | |
1809 negative = TRUE; | |
1810 ++ptr; | |
1811 } | |
1812 | |
39 | 1813 /* Recognize hex and octal. */ |
1814 if (ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9') | |
7 | 1815 { |
1816 hex = ptr[1]; | |
1817 if (dohex && (hex == 'X' || hex == 'x') && vim_isxdigit(ptr[2])) | |
1818 ptr += 2; /* hexadecimal */ | |
1819 else | |
1820 { | |
39 | 1821 hex = 0; /* default is decimal */ |
1822 if (dooct) | |
1823 { | |
1824 /* Don't interpret "0", "08" or "0129" as octal. */ | |
1825 for (n = 1; VIM_ISDIGIT(ptr[n]); ++n) | |
1826 { | |
1827 if (ptr[n] > '7') | |
1828 { | |
1829 hex = 0; /* can't be octal */ | |
1830 break; | |
1831 } | |
3292 | 1832 if (ptr[n] >= '0') |
39 | 1833 hex = '0'; /* assume octal */ |
1834 } | |
1835 } | |
7 | 1836 } |
1837 } | |
1838 | |
1839 /* | |
1840 * Do the string-to-numeric conversion "manually" to avoid sscanf quirks. | |
1841 */ | |
293 | 1842 if (hex == '0' || dooct > 1) |
7 | 1843 { |
293 | 1844 /* octal */ |
1845 while ('0' <= *ptr && *ptr <= '7') | |
7 | 1846 { |
293 | 1847 un = 8 * un + (unsigned long)(*ptr - '0'); |
1848 ++ptr; | |
7 | 1849 } |
293 | 1850 } |
1851 else if (hex != 0 || dohex > 1) | |
1852 { | |
1853 /* hex */ | |
1854 while (vim_isxdigit(*ptr)) | |
7 | 1855 { |
293 | 1856 un = 16 * un + (unsigned long)hex2nr(*ptr); |
1857 ++ptr; | |
7 | 1858 } |
1859 } | |
1860 else | |
1861 { | |
1862 /* decimal */ | |
1863 while (VIM_ISDIGIT(*ptr)) | |
1864 { | |
1865 un = 10 * un + (unsigned long)(*ptr - '0'); | |
1866 ++ptr; | |
1867 } | |
1868 } | |
1869 | |
1870 if (hexp != NULL) | |
1871 *hexp = hex; | |
1872 if (len != NULL) | |
1873 *len = (int)(ptr - start); | |
1874 if (nptr != NULL) | |
16 | 1875 { |
1876 if (negative) /* account for leading '-' for decimal numbers */ | |
1877 *nptr = -(long)un; | |
1878 else | |
1879 *nptr = (long)un; | |
1880 } | |
7 | 1881 if (unptr != NULL) |
1882 *unptr = un; | |
1883 } | |
1884 | |
1885 /* | |
1886 * Return the value of a single hex character. | |
1887 * Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'. | |
1888 */ | |
1889 int | |
1890 hex2nr(c) | |
1891 int c; | |
1892 { | |
1893 if (c >= 'a' && c <= 'f') | |
1894 return c - 'a' + 10; | |
1895 if (c >= 'A' && c <= 'F') | |
1896 return c - 'A' + 10; | |
1897 return c - '0'; | |
1898 } | |
1899 | |
1900 #if defined(FEAT_TERMRESPONSE) \ | |
1901 || (defined(FEAT_GUI_GTK) && defined(FEAT_WINDOWS)) || defined(PROTO) | |
1902 /* | |
1903 * Convert two hex characters to a byte. | |
1904 * Return -1 if one of the characters is not hex. | |
1905 */ | |
1906 int | |
1907 hexhex2nr(p) | |
1908 char_u *p; | |
1909 { | |
1910 if (!vim_isxdigit(p[0]) || !vim_isxdigit(p[1])) | |
1911 return -1; | |
1912 return (hex2nr(p[0]) << 4) + hex2nr(p[1]); | |
1913 } | |
1914 #endif | |
1915 | |
1916 /* | |
1917 * Return TRUE if "str" starts with a backslash that should be removed. | |
1918 * For MS-DOS, WIN32 and OS/2 this is only done when the character after the | |
1919 * backslash is not a normal file name character. | |
1920 * '$' is a valid file name character, we don't remove the backslash before | |
1921 * it. This means it is not possible to use an environment variable after a | |
1922 * backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works. | |
1923 * Although "\ name" is valid, the backslash in "Program\ files" must be | |
1924 * removed. Assume a file name doesn't start with a space. | |
1925 * For multi-byte names, never remove a backslash before a non-ascii | |
1926 * character, assume that all multi-byte characters are valid file name | |
1927 * characters. | |
1928 */ | |
1929 int | |
1930 rem_backslash(str) | |
1931 char_u *str; | |
1932 { | |
1933 #ifdef BACKSLASH_IN_FILENAME | |
1934 return (str[0] == '\\' | |
1935 # ifdef FEAT_MBYTE | |
1936 && str[1] < 0x80 | |
1937 # endif | |
1938 && (str[1] == ' ' | |
1939 || (str[1] != NUL | |
1940 && str[1] != '*' | |
1941 && str[1] != '?' | |
1942 && !vim_isfilec(str[1])))); | |
1943 #else | |
1944 return (str[0] == '\\' && str[1] != NUL); | |
1945 #endif | |
1946 } | |
1947 | |
1948 /* | |
1949 * Halve the number of backslashes in a file name argument. | |
1950 * For MS-DOS we only do this if the character after the backslash | |
1951 * is not a normal file character. | |
1952 */ | |
1953 void | |
1954 backslash_halve(p) | |
1955 char_u *p; | |
1956 { | |
1957 for ( ; *p; ++p) | |
1958 if (rem_backslash(p)) | |
1621 | 1959 STRMOVE(p, p + 1); |
7 | 1960 } |
1961 | |
1962 /* | |
1963 * backslash_halve() plus save the result in allocated memory. | |
1964 */ | |
1965 char_u * | |
1966 backslash_halve_save(p) | |
1967 char_u *p; | |
1968 { | |
1969 char_u *res; | |
1970 | |
1971 res = vim_strsave(p); | |
1972 if (res == NULL) | |
1973 return p; | |
1974 backslash_halve(res); | |
1975 return res; | |
1976 } | |
1977 | |
1978 #if (defined(EBCDIC) && defined(FEAT_POSTSCRIPT)) || defined(PROTO) | |
1979 /* | |
1980 * Table for EBCDIC to ASCII conversion unashamedly taken from xxd.c! | |
1981 * The first 64 entries have been added to map control characters defined in | |
1982 * ascii.h | |
1983 */ | |
1984 static char_u ebcdic2ascii_tab[256] = | |
1985 { | |
1986 0000, 0001, 0002, 0003, 0004, 0011, 0006, 0177, | |
1987 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017, | |
1988 0020, 0021, 0022, 0023, 0024, 0012, 0010, 0027, | |
1989 0030, 0031, 0032, 0033, 0033, 0035, 0036, 0037, | |
1990 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047, | |
1991 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057, | |
1992 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, | |
1993 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077, | |
1994 0040, 0240, 0241, 0242, 0243, 0244, 0245, 0246, | |
1995 0247, 0250, 0325, 0056, 0074, 0050, 0053, 0174, | |
1996 0046, 0251, 0252, 0253, 0254, 0255, 0256, 0257, | |
1997 0260, 0261, 0041, 0044, 0052, 0051, 0073, 0176, | |
1998 0055, 0057, 0262, 0263, 0264, 0265, 0266, 0267, | |
1999 0270, 0271, 0313, 0054, 0045, 0137, 0076, 0077, | |
2000 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301, | |
2001 0302, 0140, 0072, 0043, 0100, 0047, 0075, 0042, | |
2002 0303, 0141, 0142, 0143, 0144, 0145, 0146, 0147, | |
2003 0150, 0151, 0304, 0305, 0306, 0307, 0310, 0311, | |
2004 0312, 0152, 0153, 0154, 0155, 0156, 0157, 0160, | |
2005 0161, 0162, 0136, 0314, 0315, 0316, 0317, 0320, | |
2006 0321, 0345, 0163, 0164, 0165, 0166, 0167, 0170, | |
2007 0171, 0172, 0322, 0323, 0324, 0133, 0326, 0327, | |
2008 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, | |
2009 0340, 0341, 0342, 0343, 0344, 0135, 0346, 0347, | |
2010 0173, 0101, 0102, 0103, 0104, 0105, 0106, 0107, | |
2011 0110, 0111, 0350, 0351, 0352, 0353, 0354, 0355, | |
2012 0175, 0112, 0113, 0114, 0115, 0116, 0117, 0120, | |
2013 0121, 0122, 0356, 0357, 0360, 0361, 0362, 0363, | |
2014 0134, 0237, 0123, 0124, 0125, 0126, 0127, 0130, | |
2015 0131, 0132, 0364, 0365, 0366, 0367, 0370, 0371, | |
2016 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067, | |
2017 0070, 0071, 0372, 0373, 0374, 0375, 0376, 0377 | |
2018 }; | |
2019 | |
2020 /* | |
2021 * Convert a buffer worth of characters from EBCDIC to ASCII. Only useful if | |
2022 * wanting 7-bit ASCII characters out the other end. | |
2023 */ | |
2024 void | |
2025 ebcdic2ascii(buffer, len) | |
2026 char_u *buffer; | |
2027 int len; | |
2028 { | |
2029 int i; | |
2030 | |
2031 for (i = 0; i < len; i++) | |
2032 buffer[i] = ebcdic2ascii_tab[buffer[i]]; | |
2033 } | |
2034 #endif |