Mercurial > vim
comparison src/mbyte.c @ 26:404aac550f35 v7.0017
updated for version 7.0017
author | vimboss |
---|---|
date | Thu, 07 Oct 2004 21:02:47 +0000 |
parents | 3f44e9abe4ec |
children | 410fa1a31baf |
comparison
equal
deleted
inserted
replaced
25:8cde1a064c7c | 26:404aac550f35 |
---|---|
214 #define IDX_ISO_13 12 | 214 #define IDX_ISO_13 12 |
215 {"iso-8859-13", ENC_8BIT, 0}, | 215 {"iso-8859-13", ENC_8BIT, 0}, |
216 #define IDX_ISO_14 13 | 216 #define IDX_ISO_14 13 |
217 {"iso-8859-14", ENC_8BIT, 0}, | 217 {"iso-8859-14", ENC_8BIT, 0}, |
218 #define IDX_ISO_15 14 | 218 #define IDX_ISO_15 14 |
219 {"iso-8859-15", ENC_8BIT, 0}, | 219 {"iso-8859-15", ENC_8BIT + ENC_LATIN9, 0}, |
220 #define IDX_KOI8_R 15 | 220 #define IDX_KOI8_R 15 |
221 {"koi8-r", ENC_8BIT, 0}, | 221 {"koi8-r", ENC_8BIT, 0}, |
222 #define IDX_KOI8_U 16 | 222 #define IDX_KOI8_U 16 |
223 {"koi8-u", ENC_8BIT, 0}, | 223 {"koi8-u", ENC_8BIT, 0}, |
224 #define IDX_UTF8 17 | 224 #define IDX_UTF8 17 |
532 enc_dbcs = enc_dbcs_new; | 532 enc_dbcs = enc_dbcs_new; |
533 has_mbyte = (enc_dbcs != 0 || enc_utf8); | 533 has_mbyte = (enc_dbcs != 0 || enc_utf8); |
534 | 534 |
535 #ifdef WIN3264 | 535 #ifdef WIN3264 |
536 enc_codepage = encname2codepage(p_enc); | 536 enc_codepage = encname2codepage(p_enc); |
537 enc_latin9 = (STRCMP(p_enc, "iso-8859-15") == 0); | |
537 #endif | 538 #endif |
538 | 539 |
539 /* | 540 /* |
540 * Set the function pointers. | 541 * Set the function pointers. |
541 */ | 542 */ |
2484 | 2485 |
2485 /* Return 1 when on the lead byte, 0 when on the tail byte. */ | 2486 /* Return 1 when on the lead byte, 0 when on the tail byte. */ |
2486 return 1 - dbcs_head_off(base, p); | 2487 return 1 - dbcs_head_off(base, p); |
2487 } | 2488 } |
2488 | 2489 |
2490 #if defined(HAVE_GTK2) || defined(PROTO) | |
2491 /* | |
2492 * Return TRUE if string "s" is a valid utf-8 string. | |
2493 * When "end" is NULL stop at the first NUL. | |
2494 * When "end" is positive stop there. | |
2495 */ | |
2496 int | |
2497 utf_valid_string(s, end) | |
2498 char_u *s; | |
2499 char_u *end; | |
2500 { | |
2501 int l; | |
2502 char_u *p = s; | |
2503 | |
2504 while (end == NULL ? *p != NUL : p < end) | |
2505 { | |
2506 if ((*p & 0xc0) == 0x80) | |
2507 return FALSE; /* invalid lead byte */ | |
2508 l = utf8len_tab[*p]; | |
2509 if (end != NULL && p + l > end) | |
2510 return FALSE; /* incomplete byte sequence */ | |
2511 ++p; | |
2512 while (--l > 0) | |
2513 if ((*p++ & 0xc0) != 0x80) | |
2514 return FALSE; /* invalid trail byte */ | |
2515 } | |
2516 return TRUE; | |
2517 } | |
2518 #endif | |
2519 | |
2489 #if defined(FEAT_GUI) || defined(PROTO) | 2520 #if defined(FEAT_GUI) || defined(PROTO) |
2490 /* | 2521 /* |
2491 * Special version of mb_tail_off() for use in ScreenLines[]. | 2522 * Special version of mb_tail_off() for use in ScreenLines[]. |
2492 */ | 2523 */ |
2493 int | 2524 int |
5451 { | 5482 { |
5452 /* Internal latin1 -> utf-8 conversion. */ | 5483 /* Internal latin1 -> utf-8 conversion. */ |
5453 vcp->vc_type = CONV_TO_UTF8; | 5484 vcp->vc_type = CONV_TO_UTF8; |
5454 vcp->vc_factor = 2; /* up to twice as long */ | 5485 vcp->vc_factor = 2; /* up to twice as long */ |
5455 } | 5486 } |
5487 else if ((from_prop & ENC_LATIN9) && (to_prop & ENC_UNICODE)) | |
5488 { | |
5489 /* Internal latin9 -> utf-8 conversion. */ | |
5490 vcp->vc_type = CONV_9_TO_UTF8; | |
5491 vcp->vc_factor = 3; /* up to three as long (euro sign) */ | |
5492 } | |
5456 else if ((from_prop & ENC_UNICODE) && (to_prop & ENC_LATIN1)) | 5493 else if ((from_prop & ENC_UNICODE) && (to_prop & ENC_LATIN1)) |
5457 { | 5494 { |
5458 /* Internal utf-8 -> latin1 conversion. */ | 5495 /* Internal utf-8 -> latin1 conversion. */ |
5459 vcp->vc_type = CONV_TO_LATIN1; | 5496 vcp->vc_type = CONV_TO_LATIN1; |
5497 } | |
5498 else if ((from_prop & ENC_UNICODE) && (to_prop & ENC_LATIN9)) | |
5499 { | |
5500 /* Internal utf-8 -> latin9 conversion. */ | |
5501 vcp->vc_type = CONV_TO_LATIN9; | |
5460 } | 5502 } |
5461 #ifdef WIN3264 | 5503 #ifdef WIN3264 |
5462 /* Win32-specific codepage <-> codepage conversion without iconv. */ | 5504 /* Win32-specific codepage <-> codepage conversion without iconv. */ |
5463 else if (((from_prop & ENC_UNICODE) || encname2codepage(from) > 0) | 5505 else if (((from_prop & ENC_UNICODE) || encname2codepage(from) > 0) |
5464 && ((to_prop & ENC_UNICODE) || encname2codepage(to) > 0)) | 5506 && ((to_prop & ENC_UNICODE) || encname2codepage(to) > 0)) |
5620 if (retval == NULL) | 5662 if (retval == NULL) |
5621 break; | 5663 break; |
5622 d = retval; | 5664 d = retval; |
5623 for (i = 0; i < len; ++i) | 5665 for (i = 0; i < len; ++i) |
5624 { | 5666 { |
5625 if (ptr[i] < 0x80) | 5667 c = ptr[i]; |
5626 *d++ = ptr[i]; | 5668 if (c < 0x80) |
5669 *d++ = c; | |
5627 else | 5670 else |
5628 { | 5671 { |
5629 *d++ = 0xc0 + ((unsigned)ptr[i] >> 6); | 5672 *d++ = 0xc0 + ((unsigned)c >> 6); |
5630 *d++ = 0x80 + (ptr[i] & 0x3f); | 5673 *d++ = 0x80 + (c & 0x3f); |
5631 } | 5674 } |
5632 } | 5675 } |
5633 *d = NUL; | 5676 *d = NUL; |
5634 if (lenp != NULL) | 5677 if (lenp != NULL) |
5635 *lenp = (int)(d - retval); | 5678 *lenp = (int)(d - retval); |
5636 break; | 5679 break; |
5637 | 5680 |
5681 case CONV_9_TO_UTF8: /* latin9 to utf-8 conversion */ | |
5682 retval = alloc(len * 3 + 1); | |
5683 if (retval == NULL) | |
5684 break; | |
5685 d = retval; | |
5686 for (i = 0; i < len; ++i) | |
5687 { | |
5688 c = ptr[i]; | |
5689 switch (c) | |
5690 { | |
5691 case 0xa4: c = 0x20ac; break; /* euro */ | |
5692 case 0xa6: c = 0x0160; break; /* S hat */ | |
5693 case 0xa8: c = 0x0161; break; /* S -hat */ | |
5694 case 0xb4: c = 0x017d; break; /* Z hat */ | |
5695 case 0xb8: c = 0x017e; break; /* Z -hat */ | |
5696 case 0xbc: c = 0x0152; break; /* OE */ | |
5697 case 0xbd: c = 0x0153; break; /* oe */ | |
5698 case 0xbe: c = 0x0178; break; /* Y */ | |
5699 } | |
5700 d += utf_char2bytes(c, d); | |
5701 } | |
5702 *d = NUL; | |
5703 if (lenp != NULL) | |
5704 *lenp = (int)(d - retval); | |
5705 break; | |
5706 | |
5638 case CONV_TO_LATIN1: /* utf-8 to latin1 conversion */ | 5707 case CONV_TO_LATIN1: /* utf-8 to latin1 conversion */ |
5708 case CONV_TO_LATIN9: /* utf-8 to latin9 conversion */ | |
5639 retval = alloc(len + 1); | 5709 retval = alloc(len + 1); |
5640 if (retval == NULL) | 5710 if (retval == NULL) |
5641 break; | 5711 break; |
5642 d = retval; | 5712 d = retval; |
5643 for (i = 0; i < len; ++i) | 5713 for (i = 0; i < len; ++i) |
5656 *d++ = ptr[i]; | 5726 *d++ = ptr[i]; |
5657 } | 5727 } |
5658 else | 5728 else |
5659 { | 5729 { |
5660 c = utf_ptr2char(ptr + i); | 5730 c = utf_ptr2char(ptr + i); |
5731 if (vcp->vc_type == CONV_TO_LATIN9) | |
5732 switch (c) | |
5733 { | |
5734 case 0x20ac: c = 0xa4; break; /* euro */ | |
5735 case 0x0160: c = 0xa6; break; /* S hat */ | |
5736 case 0x0161: c = 0xa8; break; /* S -hat */ | |
5737 case 0x017d: c = 0xb4; break; /* Z hat */ | |
5738 case 0x017e: c = 0xb8; break; /* Z -hat */ | |
5739 case 0x0152: c = 0xbc; break; /* OE */ | |
5740 case 0x0153: c = 0xbd; break; /* oe */ | |
5741 case 0x0178: c = 0xbe; break; /* Y */ | |
5742 case 0xa4: | |
5743 case 0xa6: | |
5744 case 0xa8: | |
5745 case 0xb4: | |
5746 case 0xb8: | |
5747 case 0xbc: | |
5748 case 0xbd: | |
5749 case 0xbe: c = 0x100; break; /* not in latin9 */ | |
5750 } | |
5661 if (!utf_iscomposing(c)) /* skip composing chars */ | 5751 if (!utf_iscomposing(c)) /* skip composing chars */ |
5662 { | 5752 { |
5663 if (c < 0x100) | 5753 if (c < 0x100) |
5664 *d++ = c; | 5754 *d++ = c; |
5665 else if (vcp->vc_fail) | 5755 else if (vcp->vc_fail) |