Mercurial > vim
comparison src/mbyte.c @ 1903:4c02214d1465 v7.2.200
updated for version 7.2-200
author | vimboss |
---|---|
date | Tue, 16 Jun 2009 13:12:07 +0000 |
parents | c8f343a465a2 |
children | 062104a823df |
comparison
equal
deleted
inserted
replaced
1902:5d1ca05fa8ff | 1903:4c02214d1465 |
---|---|
125 | 125 |
126 static int enc_canon_search __ARGS((char_u *name)); | 126 static int enc_canon_search __ARGS((char_u *name)); |
127 static int dbcs_char2len __ARGS((int c)); | 127 static int dbcs_char2len __ARGS((int c)); |
128 static int dbcs_char2bytes __ARGS((int c, char_u *buf)); | 128 static int dbcs_char2bytes __ARGS((int c, char_u *buf)); |
129 static int dbcs_ptr2len __ARGS((char_u *p)); | 129 static int dbcs_ptr2len __ARGS((char_u *p)); |
130 static int dbcs_ptr2len_len __ARGS((char_u *p, int size)); | |
131 static int utf_ptr2cells_len __ARGS((char_u *p, int size)); | |
130 static int dbcs_char2cells __ARGS((int c)); | 132 static int dbcs_char2cells __ARGS((int c)); |
133 static int dbcs_ptr2cells_len __ARGS((char_u *p, int size)); | |
131 static int dbcs_ptr2char __ARGS((char_u *p)); | 134 static int dbcs_ptr2char __ARGS((char_u *p)); |
132 | 135 |
133 /* Lookup table to quickly get the length in bytes of a UTF-8 character from | 136 /* Lookup table to quickly get the length in bytes of a UTF-8 character from |
134 * the first byte of a UTF-8 string. Bytes which are illegal when used as the | 137 * the first byte of a UTF-8 string. Bytes which are illegal when used as the |
135 * first byte have a one, because these will be used separately. */ | 138 * first byte have a one, because these will be used separately. */ |
604 * Set the function pointers. | 607 * Set the function pointers. |
605 */ | 608 */ |
606 if (enc_utf8) | 609 if (enc_utf8) |
607 { | 610 { |
608 mb_ptr2len = utfc_ptr2len; | 611 mb_ptr2len = utfc_ptr2len; |
612 mb_ptr2len_len = utfc_ptr2len_len; | |
609 mb_char2len = utf_char2len; | 613 mb_char2len = utf_char2len; |
610 mb_char2bytes = utf_char2bytes; | 614 mb_char2bytes = utf_char2bytes; |
611 mb_ptr2cells = utf_ptr2cells; | 615 mb_ptr2cells = utf_ptr2cells; |
616 mb_ptr2cells_len = utf_ptr2cells_len; | |
612 mb_char2cells = utf_char2cells; | 617 mb_char2cells = utf_char2cells; |
613 mb_off2cells = utf_off2cells; | 618 mb_off2cells = utf_off2cells; |
614 mb_ptr2char = utf_ptr2char; | 619 mb_ptr2char = utf_ptr2char; |
615 mb_head_off = utf_head_off; | 620 mb_head_off = utf_head_off; |
616 } | 621 } |
617 else if (enc_dbcs != 0) | 622 else if (enc_dbcs != 0) |
618 { | 623 { |
619 mb_ptr2len = dbcs_ptr2len; | 624 mb_ptr2len = dbcs_ptr2len; |
625 mb_ptr2len_len = dbcs_ptr2len_len; | |
620 mb_char2len = dbcs_char2len; | 626 mb_char2len = dbcs_char2len; |
621 mb_char2bytes = dbcs_char2bytes; | 627 mb_char2bytes = dbcs_char2bytes; |
622 mb_ptr2cells = dbcs_ptr2cells; | 628 mb_ptr2cells = dbcs_ptr2cells; |
629 mb_ptr2cells_len = dbcs_ptr2cells_len; | |
623 mb_char2cells = dbcs_char2cells; | 630 mb_char2cells = dbcs_char2cells; |
624 mb_off2cells = dbcs_off2cells; | 631 mb_off2cells = dbcs_off2cells; |
625 mb_ptr2char = dbcs_ptr2char; | 632 mb_ptr2char = dbcs_ptr2char; |
626 mb_head_off = dbcs_head_off; | 633 mb_head_off = dbcs_head_off; |
627 } | 634 } |
628 else | 635 else |
629 { | 636 { |
630 mb_ptr2len = latin_ptr2len; | 637 mb_ptr2len = latin_ptr2len; |
638 mb_ptr2len_len = latin_ptr2len_len; | |
631 mb_char2len = latin_char2len; | 639 mb_char2len = latin_char2len; |
632 mb_char2bytes = latin_char2bytes; | 640 mb_char2bytes = latin_char2bytes; |
633 mb_ptr2cells = latin_ptr2cells; | 641 mb_ptr2cells = latin_ptr2cells; |
642 mb_ptr2cells_len = latin_ptr2cells_len; | |
634 mb_char2cells = latin_char2cells; | 643 mb_char2cells = latin_char2cells; |
635 mb_off2cells = latin_off2cells; | 644 mb_off2cells = latin_off2cells; |
636 mb_ptr2char = latin_ptr2char; | 645 mb_ptr2char = latin_ptr2char; |
637 mb_head_off = latin_head_off; | 646 mb_head_off = latin_head_off; |
638 } | 647 } |
1067 /* | 1076 /* |
1068 * mb_ptr2len() function pointer. | 1077 * mb_ptr2len() function pointer. |
1069 * Get byte length of character at "*p" but stop at a NUL. | 1078 * Get byte length of character at "*p" but stop at a NUL. |
1070 * For UTF-8 this includes following composing characters. | 1079 * For UTF-8 this includes following composing characters. |
1071 * Returns 0 when *p is NUL. | 1080 * Returns 0 when *p is NUL. |
1072 * | |
1073 */ | 1081 */ |
1074 int | 1082 int |
1075 latin_ptr2len(p) | 1083 latin_ptr2len(p) |
1076 char_u *p; | 1084 char_u *p; |
1077 { | 1085 { |
1083 char_u *p; | 1091 char_u *p; |
1084 { | 1092 { |
1085 int len; | 1093 int len; |
1086 | 1094 |
1087 /* Check if second byte is not missing. */ | 1095 /* Check if second byte is not missing. */ |
1096 len = MB_BYTE2LEN(*p); | |
1097 if (len == 2 && p[1] == NUL) | |
1098 len = 1; | |
1099 return len; | |
1100 } | |
1101 | |
1102 /* | |
1103 * mb_ptr2len_len() function pointer. | |
1104 * Like mb_ptr2len(), but limit to read "size" bytes. | |
1105 * Returns 0 for an empty string. | |
1106 * Returns 1 for an illegal char or an incomplete byte sequence. | |
1107 */ | |
1108 int | |
1109 latin_ptr2len_len(p, size) | |
1110 char_u *p; | |
1111 int size; | |
1112 { | |
1113 if (size < 1 || *p == NUL) | |
1114 return 0; | |
1115 return 1; | |
1116 } | |
1117 | |
1118 static int | |
1119 dbcs_ptr2len_len(p, size) | |
1120 char_u *p; | |
1121 int size; | |
1122 { | |
1123 int len; | |
1124 | |
1125 if (size < 1 || *p == NUL) | |
1126 return 0; | |
1127 if (size == 1) | |
1128 return 1; | |
1129 /* Check that second byte is not missing. */ | |
1088 len = MB_BYTE2LEN(*p); | 1130 len = MB_BYTE2LEN(*p); |
1089 if (len == 2 && p[1] == NUL) | 1131 if (len == 2 && p[1] == NUL) |
1090 len = 1; | 1132 len = 1; |
1091 return len; | 1133 return len; |
1092 } | 1134 } |
1285 return 1; | 1327 return 1; |
1286 return MB_BYTE2LEN(*p); | 1328 return MB_BYTE2LEN(*p); |
1287 } | 1329 } |
1288 | 1330 |
1289 /* | 1331 /* |
1332 * mb_ptr2cells_len() function pointer. | |
1333 * Like mb_ptr2cells(), but limit string length to "size". | |
1334 * For an empty string or truncated character returns 1. | |
1335 */ | |
1336 int | |
1337 latin_ptr2cells_len(p, size) | |
1338 char_u *p UNUSED; | |
1339 int size UNUSED; | |
1340 { | |
1341 return 1; | |
1342 } | |
1343 | |
1344 static int | |
1345 utf_ptr2cells_len(p, size) | |
1346 char_u *p; | |
1347 int size; | |
1348 { | |
1349 int c; | |
1350 | |
1351 /* Need to convert to a wide character. */ | |
1352 if (size > 0 && *p >= 0x80) | |
1353 { | |
1354 if (utf_ptr2len_len(p, size) < utf8len_tab[*p]) | |
1355 return 1; | |
1356 c = utf_ptr2char(p); | |
1357 /* An illegal byte is displayed as <xx>. */ | |
1358 if (utf_ptr2len(p) == 1 || c == NUL) | |
1359 return 4; | |
1360 /* If the char is ASCII it must be an overlong sequence. */ | |
1361 if (c < 0x80) | |
1362 return char2cells(c); | |
1363 return utf_char2cells(c); | |
1364 } | |
1365 return 1; | |
1366 } | |
1367 | |
1368 static int | |
1369 dbcs_ptr2cells_len(p, size) | |
1370 char_u *p; | |
1371 int size; | |
1372 { | |
1373 /* Number of cells is equal to number of bytes, except for euc-jp when | |
1374 * the first byte is 0x8e. */ | |
1375 if (size <= 1 || (enc_dbcs == DBCS_JPNU && *p == 0x8e)) | |
1376 return 1; | |
1377 return MB_BYTE2LEN(*p); | |
1378 } | |
1379 | |
1380 /* | |
1290 * mb_char2cells() function pointer. | 1381 * mb_char2cells() function pointer. |
1291 * Return the number of display cells character "c" occupies. | 1382 * Return the number of display cells character "c" occupies. |
1292 * Only takes care of multi-byte chars, not "^C" and such. | 1383 * Only takes care of multi-byte chars, not "^C" and such. |
1293 */ | 1384 */ |
1294 int | 1385 int |
1714 } | 1805 } |
1715 | 1806 |
1716 /* | 1807 /* |
1717 * Return the number of bytes the UTF-8 encoding of the character at "p[size]" | 1808 * Return the number of bytes the UTF-8 encoding of the character at "p[size]" |
1718 * takes. This includes following composing characters. | 1809 * takes. This includes following composing characters. |
1810 * Returns 0 for an empty string. | |
1719 * Returns 1 for an illegal char or an incomplete byte sequence. | 1811 * Returns 1 for an illegal char or an incomplete byte sequence. |
1720 */ | 1812 */ |
1721 int | 1813 int |
1722 utfc_ptr2len_len(p, size) | 1814 utfc_ptr2len_len(p, size) |
1723 char_u *p; | 1815 char_u *p; |
1726 int len; | 1818 int len; |
1727 #ifdef FEAT_ARABIC | 1819 #ifdef FEAT_ARABIC |
1728 int prevlen; | 1820 int prevlen; |
1729 #endif | 1821 #endif |
1730 | 1822 |
1731 if (*p == NUL) | 1823 if (size < 1 || *p == NUL) |
1732 return 0; | 1824 return 0; |
1733 if (p[0] < 0x80 && (size == 1 || p[1] < 0x80)) /* be quick for ASCII */ | 1825 if (p[0] < 0x80 && (size == 1 || p[1] < 0x80)) /* be quick for ASCII */ |
1734 return 1; | 1826 return 1; |
1735 | 1827 |
1736 /* Skip over first UTF-8 char, stopping at a NUL byte. */ | 1828 /* Skip over first UTF-8 char, stopping at a NUL byte. */ |