comparison src/mbyte.c @ 26:404aac550f35 v7.0017

updated for version 7.0017
author vimboss
date Thu, 07 Oct 2004 21:02:47 +0000
parents 3f44e9abe4ec
children 410fa1a31baf
comparison
equal deleted inserted replaced
25:8cde1a064c7c 26:404aac550f35
214 #define IDX_ISO_13 12 214 #define IDX_ISO_13 12
215 {"iso-8859-13", ENC_8BIT, 0}, 215 {"iso-8859-13", ENC_8BIT, 0},
216 #define IDX_ISO_14 13 216 #define IDX_ISO_14 13
217 {"iso-8859-14", ENC_8BIT, 0}, 217 {"iso-8859-14", ENC_8BIT, 0},
218 #define IDX_ISO_15 14 218 #define IDX_ISO_15 14
219 {"iso-8859-15", ENC_8BIT, 0}, 219 {"iso-8859-15", ENC_8BIT + ENC_LATIN9, 0},
220 #define IDX_KOI8_R 15 220 #define IDX_KOI8_R 15
221 {"koi8-r", ENC_8BIT, 0}, 221 {"koi8-r", ENC_8BIT, 0},
222 #define IDX_KOI8_U 16 222 #define IDX_KOI8_U 16
223 {"koi8-u", ENC_8BIT, 0}, 223 {"koi8-u", ENC_8BIT, 0},
224 #define IDX_UTF8 17 224 #define IDX_UTF8 17
532 enc_dbcs = enc_dbcs_new; 532 enc_dbcs = enc_dbcs_new;
533 has_mbyte = (enc_dbcs != 0 || enc_utf8); 533 has_mbyte = (enc_dbcs != 0 || enc_utf8);
534 534
535 #ifdef WIN3264 535 #ifdef WIN3264
536 enc_codepage = encname2codepage(p_enc); 536 enc_codepage = encname2codepage(p_enc);
537 enc_latin9 = (STRCMP(p_enc, "iso-8859-15") == 0);
537 #endif 538 #endif
538 539
539 /* 540 /*
540 * Set the function pointers. 541 * Set the function pointers.
541 */ 542 */
2484 2485
2485 /* Return 1 when on the lead byte, 0 when on the tail byte. */ 2486 /* Return 1 when on the lead byte, 0 when on the tail byte. */
2486 return 1 - dbcs_head_off(base, p); 2487 return 1 - dbcs_head_off(base, p);
2487 } 2488 }
2488 2489
2490 #if defined(HAVE_GTK2) || defined(PROTO)
2491 /*
2492 * Return TRUE if string "s" is a valid utf-8 string.
2493 * When "end" is NULL stop at the first NUL.
2494 * When "end" is positive stop there.
2495 */
2496 int
2497 utf_valid_string(s, end)
2498 char_u *s;
2499 char_u *end;
2500 {
2501 int l;
2502 char_u *p = s;
2503
2504 while (end == NULL ? *p != NUL : p < end)
2505 {
2506 if ((*p & 0xc0) == 0x80)
2507 return FALSE; /* invalid lead byte */
2508 l = utf8len_tab[*p];
2509 if (end != NULL && p + l > end)
2510 return FALSE; /* incomplete byte sequence */
2511 ++p;
2512 while (--l > 0)
2513 if ((*p++ & 0xc0) != 0x80)
2514 return FALSE; /* invalid trail byte */
2515 }
2516 return TRUE;
2517 }
2518 #endif
2519
2489 #if defined(FEAT_GUI) || defined(PROTO) 2520 #if defined(FEAT_GUI) || defined(PROTO)
2490 /* 2521 /*
2491 * Special version of mb_tail_off() for use in ScreenLines[]. 2522 * Special version of mb_tail_off() for use in ScreenLines[].
2492 */ 2523 */
2493 int 2524 int
5451 { 5482 {
5452 /* Internal latin1 -> utf-8 conversion. */ 5483 /* Internal latin1 -> utf-8 conversion. */
5453 vcp->vc_type = CONV_TO_UTF8; 5484 vcp->vc_type = CONV_TO_UTF8;
5454 vcp->vc_factor = 2; /* up to twice as long */ 5485 vcp->vc_factor = 2; /* up to twice as long */
5455 } 5486 }
5487 else if ((from_prop & ENC_LATIN9) && (to_prop & ENC_UNICODE))
5488 {
5489 /* Internal latin9 -> utf-8 conversion. */
5490 vcp->vc_type = CONV_9_TO_UTF8;
5491 vcp->vc_factor = 3; /* up to three as long (euro sign) */
5492 }
5456 else if ((from_prop & ENC_UNICODE) && (to_prop & ENC_LATIN1)) 5493 else if ((from_prop & ENC_UNICODE) && (to_prop & ENC_LATIN1))
5457 { 5494 {
5458 /* Internal utf-8 -> latin1 conversion. */ 5495 /* Internal utf-8 -> latin1 conversion. */
5459 vcp->vc_type = CONV_TO_LATIN1; 5496 vcp->vc_type = CONV_TO_LATIN1;
5497 }
5498 else if ((from_prop & ENC_UNICODE) && (to_prop & ENC_LATIN9))
5499 {
5500 /* Internal utf-8 -> latin9 conversion. */
5501 vcp->vc_type = CONV_TO_LATIN9;
5460 } 5502 }
5461 #ifdef WIN3264 5503 #ifdef WIN3264
5462 /* Win32-specific codepage <-> codepage conversion without iconv. */ 5504 /* Win32-specific codepage <-> codepage conversion without iconv. */
5463 else if (((from_prop & ENC_UNICODE) || encname2codepage(from) > 0) 5505 else if (((from_prop & ENC_UNICODE) || encname2codepage(from) > 0)
5464 && ((to_prop & ENC_UNICODE) || encname2codepage(to) > 0)) 5506 && ((to_prop & ENC_UNICODE) || encname2codepage(to) > 0))
5620 if (retval == NULL) 5662 if (retval == NULL)
5621 break; 5663 break;
5622 d = retval; 5664 d = retval;
5623 for (i = 0; i < len; ++i) 5665 for (i = 0; i < len; ++i)
5624 { 5666 {
5625 if (ptr[i] < 0x80) 5667 c = ptr[i];
5626 *d++ = ptr[i]; 5668 if (c < 0x80)
5669 *d++ = c;
5627 else 5670 else
5628 { 5671 {
5629 *d++ = 0xc0 + ((unsigned)ptr[i] >> 6); 5672 *d++ = 0xc0 + ((unsigned)c >> 6);
5630 *d++ = 0x80 + (ptr[i] & 0x3f); 5673 *d++ = 0x80 + (c & 0x3f);
5631 } 5674 }
5632 } 5675 }
5633 *d = NUL; 5676 *d = NUL;
5634 if (lenp != NULL) 5677 if (lenp != NULL)
5635 *lenp = (int)(d - retval); 5678 *lenp = (int)(d - retval);
5636 break; 5679 break;
5637 5680
5681 case CONV_9_TO_UTF8: /* latin9 to utf-8 conversion */
5682 retval = alloc(len * 3 + 1);
5683 if (retval == NULL)
5684 break;
5685 d = retval;
5686 for (i = 0; i < len; ++i)
5687 {
5688 c = ptr[i];
5689 switch (c)
5690 {
5691 case 0xa4: c = 0x20ac; break; /* euro */
5692 case 0xa6: c = 0x0160; break; /* S hat */
5693 case 0xa8: c = 0x0161; break; /* S -hat */
5694 case 0xb4: c = 0x017d; break; /* Z hat */
5695 case 0xb8: c = 0x017e; break; /* Z -hat */
5696 case 0xbc: c = 0x0152; break; /* OE */
5697 case 0xbd: c = 0x0153; break; /* oe */
5698 case 0xbe: c = 0x0178; break; /* Y */
5699 }
5700 d += utf_char2bytes(c, d);
5701 }
5702 *d = NUL;
5703 if (lenp != NULL)
5704 *lenp = (int)(d - retval);
5705 break;
5706
5638 case CONV_TO_LATIN1: /* utf-8 to latin1 conversion */ 5707 case CONV_TO_LATIN1: /* utf-8 to latin1 conversion */
5708 case CONV_TO_LATIN9: /* utf-8 to latin9 conversion */
5639 retval = alloc(len + 1); 5709 retval = alloc(len + 1);
5640 if (retval == NULL) 5710 if (retval == NULL)
5641 break; 5711 break;
5642 d = retval; 5712 d = retval;
5643 for (i = 0; i < len; ++i) 5713 for (i = 0; i < len; ++i)
5656 *d++ = ptr[i]; 5726 *d++ = ptr[i];
5657 } 5727 }
5658 else 5728 else
5659 { 5729 {
5660 c = utf_ptr2char(ptr + i); 5730 c = utf_ptr2char(ptr + i);
5731 if (vcp->vc_type == CONV_TO_LATIN9)
5732 switch (c)
5733 {
5734 case 0x20ac: c = 0xa4; break; /* euro */
5735 case 0x0160: c = 0xa6; break; /* S hat */
5736 case 0x0161: c = 0xa8; break; /* S -hat */
5737 case 0x017d: c = 0xb4; break; /* Z hat */
5738 case 0x017e: c = 0xb8; break; /* Z -hat */
5739 case 0x0152: c = 0xbc; break; /* OE */
5740 case 0x0153: c = 0xbd; break; /* oe */
5741 case 0x0178: c = 0xbe; break; /* Y */
5742 case 0xa4:
5743 case 0xa6:
5744 case 0xa8:
5745 case 0xb4:
5746 case 0xb8:
5747 case 0xbc:
5748 case 0xbd:
5749 case 0xbe: c = 0x100; break; /* not in latin9 */
5750 }
5661 if (!utf_iscomposing(c)) /* skip composing chars */ 5751 if (!utf_iscomposing(c)) /* skip composing chars */
5662 { 5752 {
5663 if (c < 0x100) 5753 if (c < 0x100)
5664 *d++ = c; 5754 *d++ = c;
5665 else if (vcp->vc_fail) 5755 else if (vcp->vc_fail)