comparison src/mbyte.c @ 1903:4c02214d1465 v7.2.200

updated for version 7.2-200
author vimboss
date Tue, 16 Jun 2009 13:12:07 +0000
parents c8f343a465a2
children 062104a823df
comparison
equal deleted inserted replaced
1902:5d1ca05fa8ff 1903:4c02214d1465
125 125
126 static int enc_canon_search __ARGS((char_u *name)); 126 static int enc_canon_search __ARGS((char_u *name));
127 static int dbcs_char2len __ARGS((int c)); 127 static int dbcs_char2len __ARGS((int c));
128 static int dbcs_char2bytes __ARGS((int c, char_u *buf)); 128 static int dbcs_char2bytes __ARGS((int c, char_u *buf));
129 static int dbcs_ptr2len __ARGS((char_u *p)); 129 static int dbcs_ptr2len __ARGS((char_u *p));
130 static int dbcs_ptr2len_len __ARGS((char_u *p, int size));
131 static int utf_ptr2cells_len __ARGS((char_u *p, int size));
130 static int dbcs_char2cells __ARGS((int c)); 132 static int dbcs_char2cells __ARGS((int c));
133 static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
131 static int dbcs_ptr2char __ARGS((char_u *p)); 134 static int dbcs_ptr2char __ARGS((char_u *p));
132 135
133 /* Lookup table to quickly get the length in bytes of a UTF-8 character from 136 /* Lookup table to quickly get the length in bytes of a UTF-8 character from
134 * the first byte of a UTF-8 string. Bytes which are illegal when used as the 137 * the first byte of a UTF-8 string. Bytes which are illegal when used as the
135 * first byte have a one, because these will be used separately. */ 138 * first byte have a one, because these will be used separately. */
604 * Set the function pointers. 607 * Set the function pointers.
605 */ 608 */
606 if (enc_utf8) 609 if (enc_utf8)
607 { 610 {
608 mb_ptr2len = utfc_ptr2len; 611 mb_ptr2len = utfc_ptr2len;
612 mb_ptr2len_len = utfc_ptr2len_len;
609 mb_char2len = utf_char2len; 613 mb_char2len = utf_char2len;
610 mb_char2bytes = utf_char2bytes; 614 mb_char2bytes = utf_char2bytes;
611 mb_ptr2cells = utf_ptr2cells; 615 mb_ptr2cells = utf_ptr2cells;
616 mb_ptr2cells_len = utf_ptr2cells_len;
612 mb_char2cells = utf_char2cells; 617 mb_char2cells = utf_char2cells;
613 mb_off2cells = utf_off2cells; 618 mb_off2cells = utf_off2cells;
614 mb_ptr2char = utf_ptr2char; 619 mb_ptr2char = utf_ptr2char;
615 mb_head_off = utf_head_off; 620 mb_head_off = utf_head_off;
616 } 621 }
617 else if (enc_dbcs != 0) 622 else if (enc_dbcs != 0)
618 { 623 {
619 mb_ptr2len = dbcs_ptr2len; 624 mb_ptr2len = dbcs_ptr2len;
625 mb_ptr2len_len = dbcs_ptr2len_len;
620 mb_char2len = dbcs_char2len; 626 mb_char2len = dbcs_char2len;
621 mb_char2bytes = dbcs_char2bytes; 627 mb_char2bytes = dbcs_char2bytes;
622 mb_ptr2cells = dbcs_ptr2cells; 628 mb_ptr2cells = dbcs_ptr2cells;
629 mb_ptr2cells_len = dbcs_ptr2cells_len;
623 mb_char2cells = dbcs_char2cells; 630 mb_char2cells = dbcs_char2cells;
624 mb_off2cells = dbcs_off2cells; 631 mb_off2cells = dbcs_off2cells;
625 mb_ptr2char = dbcs_ptr2char; 632 mb_ptr2char = dbcs_ptr2char;
626 mb_head_off = dbcs_head_off; 633 mb_head_off = dbcs_head_off;
627 } 634 }
628 else 635 else
629 { 636 {
630 mb_ptr2len = latin_ptr2len; 637 mb_ptr2len = latin_ptr2len;
638 mb_ptr2len_len = latin_ptr2len_len;
631 mb_char2len = latin_char2len; 639 mb_char2len = latin_char2len;
632 mb_char2bytes = latin_char2bytes; 640 mb_char2bytes = latin_char2bytes;
633 mb_ptr2cells = latin_ptr2cells; 641 mb_ptr2cells = latin_ptr2cells;
642 mb_ptr2cells_len = latin_ptr2cells_len;
634 mb_char2cells = latin_char2cells; 643 mb_char2cells = latin_char2cells;
635 mb_off2cells = latin_off2cells; 644 mb_off2cells = latin_off2cells;
636 mb_ptr2char = latin_ptr2char; 645 mb_ptr2char = latin_ptr2char;
637 mb_head_off = latin_head_off; 646 mb_head_off = latin_head_off;
638 } 647 }
1067 /* 1076 /*
1068 * mb_ptr2len() function pointer. 1077 * mb_ptr2len() function pointer.
1069 * Get byte length of character at "*p" but stop at a NUL. 1078 * Get byte length of character at "*p" but stop at a NUL.
1070 * For UTF-8 this includes following composing characters. 1079 * For UTF-8 this includes following composing characters.
1071 * Returns 0 when *p is NUL. 1080 * Returns 0 when *p is NUL.
1072 *
1073 */ 1081 */
1074 int 1082 int
1075 latin_ptr2len(p) 1083 latin_ptr2len(p)
1076 char_u *p; 1084 char_u *p;
1077 { 1085 {
1083 char_u *p; 1091 char_u *p;
1084 { 1092 {
1085 int len; 1093 int len;
1086 1094
1087 /* Check if second byte is not missing. */ 1095 /* Check if second byte is not missing. */
1096 len = MB_BYTE2LEN(*p);
1097 if (len == 2 && p[1] == NUL)
1098 len = 1;
1099 return len;
1100 }
1101
1102 /*
1103 * mb_ptr2len_len() function pointer.
1104 * Like mb_ptr2len(), but limit to read "size" bytes.
1105 * Returns 0 for an empty string.
1106 * Returns 1 for an illegal char or an incomplete byte sequence.
1107 */
1108 int
1109 latin_ptr2len_len(p, size)
1110 char_u *p;
1111 int size;
1112 {
1113 if (size < 1 || *p == NUL)
1114 return 0;
1115 return 1;
1116 }
1117
1118 static int
1119 dbcs_ptr2len_len(p, size)
1120 char_u *p;
1121 int size;
1122 {
1123 int len;
1124
1125 if (size < 1 || *p == NUL)
1126 return 0;
1127 if (size == 1)
1128 return 1;
1129 /* Check that second byte is not missing. */
1088 len = MB_BYTE2LEN(*p); 1130 len = MB_BYTE2LEN(*p);
1089 if (len == 2 && p[1] == NUL) 1131 if (len == 2 && p[1] == NUL)
1090 len = 1; 1132 len = 1;
1091 return len; 1133 return len;
1092 } 1134 }
1285 return 1; 1327 return 1;
1286 return MB_BYTE2LEN(*p); 1328 return MB_BYTE2LEN(*p);
1287 } 1329 }
1288 1330
1289 /* 1331 /*
1332 * mb_ptr2cells_len() function pointer.
1333 * Like mb_ptr2cells(), but limit string length to "size".
1334 * For an empty string or truncated character returns 1.
1335 */
1336 int
1337 latin_ptr2cells_len(p, size)
1338 char_u *p UNUSED;
1339 int size UNUSED;
1340 {
1341 return 1;
1342 }
1343
1344 static int
1345 utf_ptr2cells_len(p, size)
1346 char_u *p;
1347 int size;
1348 {
1349 int c;
1350
1351 /* Need to convert to a wide character. */
1352 if (size > 0 && *p >= 0x80)
1353 {
1354 if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
1355 return 1;
1356 c = utf_ptr2char(p);
1357 /* An illegal byte is displayed as <xx>. */
1358 if (utf_ptr2len(p) == 1 || c == NUL)
1359 return 4;
1360 /* If the char is ASCII it must be an overlong sequence. */
1361 if (c < 0x80)
1362 return char2cells(c);
1363 return utf_char2cells(c);
1364 }
1365 return 1;
1366 }
1367
1368 static int
1369 dbcs_ptr2cells_len(p, size)
1370 char_u *p;
1371 int size;
1372 {
1373 /* Number of cells is equal to number of bytes, except for euc-jp when
1374 * the first byte is 0x8e. */
1375 if (size <= 1 || (enc_dbcs == DBCS_JPNU && *p == 0x8e))
1376 return 1;
1377 return MB_BYTE2LEN(*p);
1378 }
1379
1380 /*
1290 * mb_char2cells() function pointer. 1381 * mb_char2cells() function pointer.
1291 * Return the number of display cells character "c" occupies. 1382 * Return the number of display cells character "c" occupies.
1292 * Only takes care of multi-byte chars, not "^C" and such. 1383 * Only takes care of multi-byte chars, not "^C" and such.
1293 */ 1384 */
1294 int 1385 int
1714 } 1805 }
1715 1806
1716 /* 1807 /*
1717 * Return the number of bytes the UTF-8 encoding of the character at "p[size]" 1808 * Return the number of bytes the UTF-8 encoding of the character at "p[size]"
1718 * takes. This includes following composing characters. 1809 * takes. This includes following composing characters.
1810 * Returns 0 for an empty string.
1719 * Returns 1 for an illegal char or an incomplete byte sequence. 1811 * Returns 1 for an illegal char or an incomplete byte sequence.
1720 */ 1812 */
1721 int 1813 int
1722 utfc_ptr2len_len(p, size) 1814 utfc_ptr2len_len(p, size)
1723 char_u *p; 1815 char_u *p;
1726 int len; 1818 int len;
1727 #ifdef FEAT_ARABIC 1819 #ifdef FEAT_ARABIC
1728 int prevlen; 1820 int prevlen;
1729 #endif 1821 #endif
1730 1822
1731 if (*p == NUL) 1823 if (size < 1 || *p == NUL)
1732 return 0; 1824 return 0;
1733 if (p[0] < 0x80 && (size == 1 || p[1] < 0x80)) /* be quick for ASCII */ 1825 if (p[0] < 0x80 && (size == 1 || p[1] < 0x80)) /* be quick for ASCII */
1734 return 1; 1826 return 1;
1735 1827
1736 /* Skip over first UTF-8 char, stopping at a NUL byte. */ 1828 /* Skip over first UTF-8 char, stopping at a NUL byte. */