comparison src/mbyte.c @ 18800:f41b55f9357c v8.1.2388

patch 8.1.2388: using old C style comments Commit: https://github.com/vim/vim/commit/4ba37b5833de99db9e9afe8928b31c864182405c Author: Bram Moolenaar <Bram@vim.org> Date: Wed Dec 4 21:57:43 2019 +0100 patch 8.1.2388: using old C style comments Problem: Using old C style comments. Solution: Use // comments where appropriate.
author Bram Moolenaar <Bram@vim.org>
date Wed, 04 Dec 2019 22:00:04 +0100
parents e9b2ade1adbd
children 2ef19eed524a
comparison
equal deleted inserted replaced
18799:d7d0942e231b 18800:f41b55f9357c
89 # else 89 # else
90 # include <windows.h> 90 # include <windows.h>
91 # define WINBYTE BYTE 91 # define WINBYTE BYTE
92 # endif 92 # endif
93 # ifdef WIN32 93 # ifdef WIN32
94 # undef WIN32 /* Some windows.h define WIN32, we don't want that here. */ 94 # undef WIN32 // Some windows.h define WIN32, we don't want that here.
95 # endif 95 # endif
96 #else 96 #else
97 # define WINBYTE BYTE 97 # define WINBYTE BYTE
98 #endif 98 #endif
99 99
127 #ifdef HAVE_WCHAR_H 127 #ifdef HAVE_WCHAR_H
128 # include <wchar.h> 128 # include <wchar.h>
129 #endif 129 #endif
130 130
131 #if 0 131 #if 0
132 /* This has been disabled, because several people reported problems with the 132 // This has been disabled, because several people reported problems with the
133 * wcwidth() and iswprint() library functions, esp. for Hebrew. */ 133 // wcwidth() and iswprint() library functions, esp. for Hebrew.
134 # ifdef __STDC_ISO_10646__ 134 # ifdef __STDC_ISO_10646__
135 # define USE_WCHAR_FUNCTIONS 135 # define USE_WCHAR_FUNCTIONS
136 # endif 136 # endif
137 #endif 137 #endif
138 138
181 181
182 /* 182 /*
183 * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks 183 * XIM often causes trouble. Define XIM_DEBUG to get a log of XIM callbacks
184 * in the "xim.log" file. 184 * in the "xim.log" file.
185 */ 185 */
186 /* #define XIM_DEBUG */ 186 // #define XIM_DEBUG
187 #ifdef XIM_DEBUG 187 #ifdef XIM_DEBUG
188 static void 188 static void
189 xim_log(char *s, ...) 189 xim_log(char *s, ...)
190 { 190 {
191 va_list arglist; 191 va_list arglist;
264 #define IDX_UCS4 21 264 #define IDX_UCS4 21
265 {"ucs-4", ENC_UNICODE + ENC_ENDIAN_B + ENC_4BYTE, 0}, 265 {"ucs-4", ENC_UNICODE + ENC_ENDIAN_B + ENC_4BYTE, 0},
266 #define IDX_UCS4LE 22 266 #define IDX_UCS4LE 22
267 {"ucs-4le", ENC_UNICODE + ENC_ENDIAN_L + ENC_4BYTE, 0}, 267 {"ucs-4le", ENC_UNICODE + ENC_ENDIAN_L + ENC_4BYTE, 0},
268 268
269 /* For debugging DBCS encoding on Unix. */ 269 // For debugging DBCS encoding on Unix.
270 #define IDX_DEBUG 23 270 #define IDX_DEBUG 23
271 {"debug", ENC_DBCS, DBCS_DEBUG}, 271 {"debug", ENC_DBCS, DBCS_DEBUG},
272 #define IDX_EUC_JP 24 272 #define IDX_EUC_JP 24
273 {"euc-jp", ENC_DBCS, DBCS_JPNU}, 273 {"euc-jp", ENC_DBCS, DBCS_JPNU},
274 #define IDX_SJIS 25 274 #define IDX_SJIS 25
280 #define IDX_EUC_TW 28 280 #define IDX_EUC_TW 28
281 {"euc-tw", ENC_DBCS, DBCS_CHTU}, 281 {"euc-tw", ENC_DBCS, DBCS_CHTU},
282 #define IDX_BIG5 29 282 #define IDX_BIG5 29
283 {"big5", ENC_DBCS, DBCS_CHT}, 283 {"big5", ENC_DBCS, DBCS_CHT},
284 284
285 /* MS-DOS and MS-Windows codepages are included here, so that they can be 285 // MS-DOS and MS-Windows codepages are included here, so that they can be
286 * used on Unix too. Most of them are similar to ISO-8859 encodings, but 286 // used on Unix too. Most of them are similar to ISO-8859 encodings, but
287 * not exactly the same. */ 287 // not exactly the same.
288 #define IDX_CP437 30 288 #define IDX_CP437 30
289 {"cp437", ENC_8BIT, 437}, /* like iso-8859-1 */ 289 {"cp437", ENC_8BIT, 437}, // like iso-8859-1
290 #define IDX_CP737 31 290 #define IDX_CP737 31
291 {"cp737", ENC_8BIT, 737}, /* like iso-8859-7 */ 291 {"cp737", ENC_8BIT, 737}, // like iso-8859-7
292 #define IDX_CP775 32 292 #define IDX_CP775 32
293 {"cp775", ENC_8BIT, 775}, /* Baltic */ 293 {"cp775", ENC_8BIT, 775}, // Baltic
294 #define IDX_CP850 33 294 #define IDX_CP850 33
295 {"cp850", ENC_8BIT, 850}, /* like iso-8859-4 */ 295 {"cp850", ENC_8BIT, 850}, // like iso-8859-4
296 #define IDX_CP852 34 296 #define IDX_CP852 34
297 {"cp852", ENC_8BIT, 852}, /* like iso-8859-1 */ 297 {"cp852", ENC_8BIT, 852}, // like iso-8859-1
298 #define IDX_CP855 35 298 #define IDX_CP855 35
299 {"cp855", ENC_8BIT, 855}, /* like iso-8859-2 */ 299 {"cp855", ENC_8BIT, 855}, // like iso-8859-2
300 #define IDX_CP857 36 300 #define IDX_CP857 36
301 {"cp857", ENC_8BIT, 857}, /* like iso-8859-5 */ 301 {"cp857", ENC_8BIT, 857}, // like iso-8859-5
302 #define IDX_CP860 37 302 #define IDX_CP860 37
303 {"cp860", ENC_8BIT, 860}, /* like iso-8859-9 */ 303 {"cp860", ENC_8BIT, 860}, // like iso-8859-9
304 #define IDX_CP861 38 304 #define IDX_CP861 38
305 {"cp861", ENC_8BIT, 861}, /* like iso-8859-1 */ 305 {"cp861", ENC_8BIT, 861}, // like iso-8859-1
306 #define IDX_CP862 39 306 #define IDX_CP862 39
307 {"cp862", ENC_8BIT, 862}, /* like iso-8859-1 */ 307 {"cp862", ENC_8BIT, 862}, // like iso-8859-1
308 #define IDX_CP863 40 308 #define IDX_CP863 40
309 {"cp863", ENC_8BIT, 863}, /* like iso-8859-8 */ 309 {"cp863", ENC_8BIT, 863}, // like iso-8859-8
310 #define IDX_CP865 41 310 #define IDX_CP865 41
311 {"cp865", ENC_8BIT, 865}, /* like iso-8859-1 */ 311 {"cp865", ENC_8BIT, 865}, // like iso-8859-1
312 #define IDX_CP866 42 312 #define IDX_CP866 42
313 {"cp866", ENC_8BIT, 866}, /* like iso-8859-5 */ 313 {"cp866", ENC_8BIT, 866}, // like iso-8859-5
314 #define IDX_CP869 43 314 #define IDX_CP869 43
315 {"cp869", ENC_8BIT, 869}, /* like iso-8859-7 */ 315 {"cp869", ENC_8BIT, 869}, // like iso-8859-7
316 #define IDX_CP874 44 316 #define IDX_CP874 44
317 {"cp874", ENC_8BIT, 874}, /* Thai */ 317 {"cp874", ENC_8BIT, 874}, // Thai
318 #define IDX_CP932 45 318 #define IDX_CP932 45
319 {"cp932", ENC_DBCS, DBCS_JPN}, 319 {"cp932", ENC_DBCS, DBCS_JPN},
320 #define IDX_CP936 46 320 #define IDX_CP936 46
321 {"cp936", ENC_DBCS, DBCS_CHS}, 321 {"cp936", ENC_DBCS, DBCS_CHS},
322 #define IDX_CP949 47 322 #define IDX_CP949 47
323 {"cp949", ENC_DBCS, DBCS_KOR}, 323 {"cp949", ENC_DBCS, DBCS_KOR},
324 #define IDX_CP950 48 324 #define IDX_CP950 48
325 {"cp950", ENC_DBCS, DBCS_CHT}, 325 {"cp950", ENC_DBCS, DBCS_CHT},
326 #define IDX_CP1250 49 326 #define IDX_CP1250 49
327 {"cp1250", ENC_8BIT, 1250}, /* Czech, Polish, etc. */ 327 {"cp1250", ENC_8BIT, 1250}, // Czech, Polish, etc.
328 #define IDX_CP1251 50 328 #define IDX_CP1251 50
329 {"cp1251", ENC_8BIT, 1251}, /* Cyrillic */ 329 {"cp1251", ENC_8BIT, 1251}, // Cyrillic
330 /* cp1252 is considered to be equal to latin1 */ 330 // cp1252 is considered to be equal to latin1
331 #define IDX_CP1253 51 331 #define IDX_CP1253 51
332 {"cp1253", ENC_8BIT, 1253}, /* Greek */ 332 {"cp1253", ENC_8BIT, 1253}, // Greek
333 #define IDX_CP1254 52 333 #define IDX_CP1254 52
334 {"cp1254", ENC_8BIT, 1254}, /* Turkish */ 334 {"cp1254", ENC_8BIT, 1254}, // Turkish
335 #define IDX_CP1255 53 335 #define IDX_CP1255 53
336 {"cp1255", ENC_8BIT, 1255}, /* Hebrew */ 336 {"cp1255", ENC_8BIT, 1255}, // Hebrew
337 #define IDX_CP1256 54 337 #define IDX_CP1256 54
338 {"cp1256", ENC_8BIT, 1256}, /* Arabic */ 338 {"cp1256", ENC_8BIT, 1256}, // Arabic
339 #define IDX_CP1257 55 339 #define IDX_CP1257 55
340 {"cp1257", ENC_8BIT, 1257}, /* Baltic */ 340 {"cp1257", ENC_8BIT, 1257}, // Baltic
341 #define IDX_CP1258 56 341 #define IDX_CP1258 56
342 {"cp1258", ENC_8BIT, 1258}, /* Vietnamese */ 342 {"cp1258", ENC_8BIT, 1258}, // Vietnamese
343 343
344 #define IDX_MACROMAN 57 344 #define IDX_MACROMAN 57
345 {"macroman", ENC_8BIT + ENC_MACROMAN, 0}, /* Mac OS */ 345 {"macroman", ENC_8BIT + ENC_MACROMAN, 0}, // Mac OS
346 #define IDX_DECMCS 58 346 #define IDX_DECMCS 58
347 {"dec-mcs", ENC_8BIT, 0}, /* DEC MCS */ 347 {"dec-mcs", ENC_8BIT, 0}, // DEC MCS
348 #define IDX_HPROMAN8 59 348 #define IDX_HPROMAN8 59
349 {"hp-roman8", ENC_8BIT, 0}, /* HP Roman8 */ 349 {"hp-roman8", ENC_8BIT, 0}, // HP Roman8
350 #define IDX_COUNT 60 350 #define IDX_COUNT 60
351 }; 351 };
352 352
353 /* 353 /*
354 * Aliases for encoding names. 354 * Aliases for encoding names.
369 {"hebrew", IDX_CP1255}, 369 {"hebrew", IDX_CP1255},
370 #else 370 #else
371 {"hebrew", IDX_ISO_8}, 371 {"hebrew", IDX_ISO_8},
372 #endif 372 #endif
373 {"latin5", IDX_ISO_9}, 373 {"latin5", IDX_ISO_9},
374 {"turkish", IDX_ISO_9}, /* ? */ 374 {"turkish", IDX_ISO_9}, // ?
375 {"latin6", IDX_ISO_10}, 375 {"latin6", IDX_ISO_10},
376 {"nordic", IDX_ISO_10}, /* ? */ 376 {"nordic", IDX_ISO_10}, // ?
377 {"thai", IDX_ISO_11}, /* ? */ 377 {"thai", IDX_ISO_11}, // ?
378 {"latin7", IDX_ISO_13}, 378 {"latin7", IDX_ISO_13},
379 {"latin8", IDX_ISO_14}, 379 {"latin8", IDX_ISO_14},
380 {"latin9", IDX_ISO_15}, 380 {"latin9", IDX_ISO_15},
381 {"utf8", IDX_UTF8}, 381 {"utf8", IDX_UTF8},
382 {"unicode", IDX_UCS2}, 382 {"unicode", IDX_UCS2},
405 {"950", IDX_CP950}, 405 {"950", IDX_CP950},
406 {"eucjp", IDX_EUC_JP}, 406 {"eucjp", IDX_EUC_JP},
407 {"unix-jis", IDX_EUC_JP}, 407 {"unix-jis", IDX_EUC_JP},
408 {"ujis", IDX_EUC_JP}, 408 {"ujis", IDX_EUC_JP},
409 {"shift-jis", IDX_SJIS}, 409 {"shift-jis", IDX_SJIS},
410 {"pck", IDX_SJIS}, /* Sun: PCK */ 410 {"pck", IDX_SJIS}, // Sun: PCK
411 {"euckr", IDX_EUC_KR}, 411 {"euckr", IDX_EUC_KR},
412 {"5601", IDX_EUC_KR}, /* Sun: KS C 5601 */ 412 {"5601", IDX_EUC_KR}, // Sun: KS C 5601
413 {"euccn", IDX_EUC_CN}, 413 {"euccn", IDX_EUC_CN},
414 {"gb2312", IDX_EUC_CN}, 414 {"gb2312", IDX_EUC_CN},
415 {"euctw", IDX_EUC_TW}, 415 {"euctw", IDX_EUC_TW},
416 #if defined(MSWIN) || defined(WIN32UNIX) || defined(MACOS_X) 416 #if defined(MSWIN) || defined(WIN32UNIX) || defined(MACOS_X)
417 {"japan", IDX_CP932}, 417 {"japan", IDX_CP932},
433 {"mac-roman", IDX_MACROMAN}, 433 {"mac-roman", IDX_MACROMAN},
434 {NULL, 0} 434 {NULL, 0}
435 }; 435 };
436 436
437 #ifndef CP_UTF8 437 #ifndef CP_UTF8
438 # define CP_UTF8 65001 /* magic number from winnls.h */ 438 # define CP_UTF8 65001 // magic number from winnls.h
439 #endif 439 #endif
440 440
441 /* 441 /*
442 * Find encoding "name" in the list of canonical encoding names. 442 * Find encoding "name" in the list of canonical encoding names.
443 * Returns -1 if not found. 443 * Returns -1 if not found.
469 #ifdef MSWIN 469 #ifdef MSWIN
470 if (name[0] == 'c' && name[1] == 'p' && VIM_ISDIGIT(name[2])) 470 if (name[0] == 'c' && name[1] == 'p' && VIM_ISDIGIT(name[2]))
471 { 471 {
472 CPINFO cpinfo; 472 CPINFO cpinfo;
473 473
474 /* Get info on this codepage to find out what it is. */ 474 // Get info on this codepage to find out what it is.
475 if (GetCPInfo(atoi((char *)name + 2), &cpinfo) != 0) 475 if (GetCPInfo(atoi((char *)name + 2), &cpinfo) != 0)
476 { 476 {
477 if (cpinfo.MaxCharSize == 1) /* some single-byte encoding */ 477 if (cpinfo.MaxCharSize == 1) // some single-byte encoding
478 return ENC_8BIT; 478 return ENC_8BIT;
479 if (cpinfo.MaxCharSize == 2 479 if (cpinfo.MaxCharSize == 2
480 && (cpinfo.LeadByte[0] != 0 || cpinfo.LeadByte[1] != 0)) 480 && (cpinfo.LeadByte[0] != 0 || cpinfo.LeadByte[1] != 0))
481 /* must be a DBCS encoding */ 481 // must be a DBCS encoding
482 return ENC_DBCS; 482 return ENC_DBCS;
483 } 483 }
484 return 0; 484 return 0;
485 } 485 }
486 #endif 486 #endif
517 char_u *p; 517 char_u *p;
518 #endif 518 #endif
519 519
520 if (p_enc == NULL) 520 if (p_enc == NULL)
521 { 521 {
522 /* Just starting up: set the whole table to one's. */ 522 // Just starting up: set the whole table to one's.
523 for (i = 0; i < 256; ++i) 523 for (i = 0; i < 256; ++i)
524 mb_bytelen_tab[i] = 1; 524 mb_bytelen_tab[i] = 1;
525 input_conv.vc_type = CONV_NONE; 525 input_conv.vc_type = CONV_NONE;
526 input_conv.vc_factor = 1; 526 input_conv.vc_factor = 1;
527 output_conv.vc_type = CONV_NONE; 527 output_conv.vc_type = CONV_NONE;
531 #ifdef MSWIN 531 #ifdef MSWIN
532 if (p_enc[0] == 'c' && p_enc[1] == 'p' && VIM_ISDIGIT(p_enc[2])) 532 if (p_enc[0] == 'c' && p_enc[1] == 'p' && VIM_ISDIGIT(p_enc[2]))
533 { 533 {
534 CPINFO cpinfo; 534 CPINFO cpinfo;
535 535
536 /* Get info on this codepage to find out what it is. */ 536 // Get info on this codepage to find out what it is.
537 if (GetCPInfo(atoi((char *)p_enc + 2), &cpinfo) != 0) 537 if (GetCPInfo(atoi((char *)p_enc + 2), &cpinfo) != 0)
538 { 538 {
539 if (cpinfo.MaxCharSize == 1) 539 if (cpinfo.MaxCharSize == 1)
540 { 540 {
541 /* some single-byte encoding */ 541 // some single-byte encoding
542 enc_unicode = 0; 542 enc_unicode = 0;
543 enc_utf8 = FALSE; 543 enc_utf8 = FALSE;
544 } 544 }
545 else if (cpinfo.MaxCharSize == 2 545 else if (cpinfo.MaxCharSize == 2
546 && (cpinfo.LeadByte[0] != 0 || cpinfo.LeadByte[1] != 0)) 546 && (cpinfo.LeadByte[0] != 0 || cpinfo.LeadByte[1] != 0))
547 { 547 {
548 /* must be a DBCS encoding, check below */ 548 // must be a DBCS encoding, check below
549 enc_dbcs_new = atoi((char *)p_enc + 2); 549 enc_dbcs_new = atoi((char *)p_enc + 2);
550 } 550 }
551 else 551 else
552 goto codepage_invalid; 552 goto codepage_invalid;
553 } 553 }
559 } 559 }
560 #endif 560 #endif
561 else if (STRNCMP(p_enc, "8bit-", 5) == 0 561 else if (STRNCMP(p_enc, "8bit-", 5) == 0
562 || STRNCMP(p_enc, "iso-8859-", 9) == 0) 562 || STRNCMP(p_enc, "iso-8859-", 9) == 0)
563 { 563 {
564 /* Accept any "8bit-" or "iso-8859-" name. */ 564 // Accept any "8bit-" or "iso-8859-" name.
565 enc_unicode = 0; 565 enc_unicode = 0;
566 enc_utf8 = FALSE; 566 enc_utf8 = FALSE;
567 } 567 }
568 else if (STRNCMP(p_enc, "2byte-", 6) == 0) 568 else if (STRNCMP(p_enc, "2byte-", 6) == 0)
569 { 569 {
570 #ifdef MSWIN 570 #ifdef MSWIN
571 /* Windows: accept only valid codepage numbers, check below. */ 571 // Windows: accept only valid codepage numbers, check below.
572 if (p_enc[6] != 'c' || p_enc[7] != 'p' 572 if (p_enc[6] != 'c' || p_enc[7] != 'p'
573 || (enc_dbcs_new = atoi((char *)p_enc + 8)) == 0) 573 || (enc_dbcs_new = atoi((char *)p_enc + 8)) == 0)
574 return e_invarg; 574 return e_invarg;
575 #else 575 #else
576 /* Unix: accept any "2byte-" name, assume current locale. */ 576 // Unix: accept any "2byte-" name, assume current locale.
577 enc_dbcs_new = DBCS_2BYTE; 577 enc_dbcs_new = DBCS_2BYTE;
578 #endif 578 #endif
579 } 579 }
580 else if ((idx = enc_canon_search(p_enc)) >= 0) 580 else if ((idx = enc_canon_search(p_enc)) >= 0)
581 { 581 {
582 i = enc_canon_table[idx].prop; 582 i = enc_canon_table[idx].prop;
583 if (i & ENC_UNICODE) 583 if (i & ENC_UNICODE)
584 { 584 {
585 /* Unicode */ 585 // Unicode
586 enc_utf8 = TRUE; 586 enc_utf8 = TRUE;
587 if (i & (ENC_2BYTE | ENC_2WORD)) 587 if (i & (ENC_2BYTE | ENC_2WORD))
588 enc_unicode = 2; 588 enc_unicode = 2;
589 else if (i & ENC_4BYTE) 589 else if (i & ENC_4BYTE)
590 enc_unicode = 4; 590 enc_unicode = 4;
591 else 591 else
592 enc_unicode = 0; 592 enc_unicode = 0;
593 } 593 }
594 else if (i & ENC_DBCS) 594 else if (i & ENC_DBCS)
595 { 595 {
596 /* 2byte, handle below */ 596 // 2byte, handle below
597 enc_dbcs_new = enc_canon_table[idx].codepage; 597 enc_dbcs_new = enc_canon_table[idx].codepage;
598 } 598 }
599 else 599 else
600 { 600 {
601 /* Must be 8-bit. */ 601 // Must be 8-bit.
602 enc_unicode = 0; 602 enc_unicode = 0;
603 enc_utf8 = FALSE; 603 enc_utf8 = FALSE;
604 } 604 }
605 } 605 }
606 else /* Don't know what encoding this is, reject it. */ 606 else // Don't know what encoding this is, reject it.
607 return e_invarg; 607 return e_invarg;
608 608
609 if (enc_dbcs_new != 0) 609 if (enc_dbcs_new != 0)
610 { 610 {
611 #ifdef MSWIN 611 #ifdef MSWIN
612 /* Check if the DBCS code page is OK. */ 612 // Check if the DBCS code page is OK.
613 if (!IsValidCodePage(enc_dbcs_new)) 613 if (!IsValidCodePage(enc_dbcs_new))
614 goto codepage_invalid; 614 goto codepage_invalid;
615 #endif 615 #endif
616 enc_unicode = 0; 616 enc_unicode = 0;
617 enc_utf8 = FALSE; 617 enc_utf8 = FALSE;
622 #if defined(MSWIN) || defined(FEAT_CYGWIN_WIN32_CLIPBOARD) 622 #if defined(MSWIN) || defined(FEAT_CYGWIN_WIN32_CLIPBOARD)
623 enc_codepage = encname2codepage(p_enc); 623 enc_codepage = encname2codepage(p_enc);
624 enc_latin9 = (STRCMP(p_enc, "iso-8859-15") == 0); 624 enc_latin9 = (STRCMP(p_enc, "iso-8859-15") == 0);
625 #endif 625 #endif
626 626
627 /* Detect an encoding that uses latin1 characters. */ 627 // Detect an encoding that uses latin1 characters.
628 enc_latin1like = (enc_utf8 || STRCMP(p_enc, "latin1") == 0 628 enc_latin1like = (enc_utf8 || STRCMP(p_enc, "latin1") == 0
629 || STRCMP(p_enc, "iso-8859-15") == 0); 629 || STRCMP(p_enc, "iso-8859-15") == 0);
630 630
631 /* 631 /*
632 * Set the function pointers. 632 * Set the function pointers.
673 673
674 /* 674 /*
675 * Fill the mb_bytelen_tab[] for MB_BYTE2LEN(). 675 * Fill the mb_bytelen_tab[] for MB_BYTE2LEN().
676 */ 676 */
677 #ifdef LEN_FROM_CONV 677 #ifdef LEN_FROM_CONV
678 /* When 'encoding' is different from the current locale mblen() won't 678 // When 'encoding' is different from the current locale mblen() won't
679 * work. Use conversion to "utf-8" instead. */ 679 // work. Use conversion to "utf-8" instead.
680 vimconv.vc_type = CONV_NONE; 680 vimconv.vc_type = CONV_NONE;
681 if (enc_dbcs) 681 if (enc_dbcs)
682 { 682 {
683 p = enc_locale(); 683 p = enc_locale();
684 if (p == NULL || STRCMP(p, p_enc) != 0) 684 if (p == NULL || STRCMP(p, p_enc) != 0)
690 } 690 }
691 #endif 691 #endif
692 692
693 for (i = 0; i < 256; ++i) 693 for (i = 0; i < 256; ++i)
694 { 694 {
695 /* Our own function to reliably check the length of UTF-8 characters, 695 // Our own function to reliably check the length of UTF-8 characters,
696 * independent of mblen(). */ 696 // independent of mblen().
697 if (enc_utf8) 697 if (enc_utf8)
698 n = utf8len_tab[i]; 698 n = utf8len_tab[i];
699 else if (enc_dbcs == 0) 699 else if (enc_dbcs == 0)
700 n = 1; 700 n = 1;
701 else 701 else
702 { 702 {
703 #if defined(MSWIN) || defined(WIN32UNIX) 703 #if defined(MSWIN) || defined(WIN32UNIX)
704 /* enc_dbcs is set by setting 'fileencoding'. It becomes a Windows 704 // enc_dbcs is set by setting 'fileencoding'. It becomes a Windows
705 * CodePage identifier, which we can pass directly in to Windows 705 // CodePage identifier, which we can pass directly in to Windows
706 * API */ 706 // API
707 n = IsDBCSLeadByteEx(enc_dbcs, (WINBYTE)i) ? 2 : 1; 707 n = IsDBCSLeadByteEx(enc_dbcs, (WINBYTE)i) ? 2 : 1;
708 #else 708 #else
709 # if defined(__amigaos4__) || defined(__ANDROID__) || \ 709 # if defined(__amigaos4__) || defined(__ANDROID__) || \
710 !(defined(HAVE_MBLEN) || defined(X_LOCALE)) 710 !(defined(HAVE_MBLEN) || defined(X_LOCALE))
711 /* 711 /*
715 */ 715 */
716 n = (i & 0x80) ? 2 : 1; 716 n = (i & 0x80) ? 2 : 1;
717 # else 717 # else
718 char buf[MB_MAXBYTES + 1]; 718 char buf[MB_MAXBYTES + 1];
719 719
720 if (i == NUL) /* just in case mblen() can't handle "" */ 720 if (i == NUL) // just in case mblen() can't handle ""
721 n = 1; 721 n = 1;
722 else 722 else
723 { 723 {
724 buf[0] = i; 724 buf[0] = i;
725 buf[1] = 0; 725 buf[1] = 0;
764 764
765 #ifdef LEN_FROM_CONV 765 #ifdef LEN_FROM_CONV
766 convert_setup(&vimconv, NULL, NULL); 766 convert_setup(&vimconv, NULL, NULL);
767 #endif 767 #endif
768 768
769 /* The cell width depends on the type of multi-byte characters. */ 769 // The cell width depends on the type of multi-byte characters.
770 (void)init_chartab(); 770 (void)init_chartab();
771 771
772 /* When enc_utf8 is set or reset, (de)allocate ScreenLinesUC[] */ 772 // When enc_utf8 is set or reset, (de)allocate ScreenLinesUC[]
773 screenalloc(FALSE); 773 screenalloc(FALSE);
774 774
775 /* When using Unicode, set default for 'fileencodings'. */ 775 // When using Unicode, set default for 'fileencodings'.
776 if (enc_utf8 && !option_was_set((char_u *)"fencs")) 776 if (enc_utf8 && !option_was_set((char_u *)"fencs"))
777 set_string_option_direct((char_u *)"fencs", -1, 777 set_string_option_direct((char_u *)"fencs", -1,
778 (char_u *)"ucs-bom,utf-8,default,latin1", OPT_FREE, 0); 778 (char_u *)"ucs-bom,utf-8,default,latin1", OPT_FREE, 0);
779 779
780 #if defined(HAVE_BIND_TEXTDOMAIN_CODESET) && defined(FEAT_GETTEXT) 780 #if defined(HAVE_BIND_TEXTDOMAIN_CODESET) && defined(FEAT_GETTEXT)
781 /* GNU gettext 0.10.37 supports this feature: set the codeset used for 781 // GNU gettext 0.10.37 supports this feature: set the codeset used for
782 * translated messages independently from the current locale. */ 782 // translated messages independently from the current locale.
783 (void)bind_textdomain_codeset(VIMPACKAGE, 783 (void)bind_textdomain_codeset(VIMPACKAGE,
784 enc_utf8 ? "utf-8" : (char *)p_enc); 784 enc_utf8 ? "utf-8" : (char *)p_enc);
785 #endif 785 #endif
786 786
787 #ifdef MSWIN 787 #ifdef MSWIN
788 /* When changing 'encoding' while starting up, then convert the command 788 // When changing 'encoding' while starting up, then convert the command
789 * line arguments from the active codepage to 'encoding'. */ 789 // line arguments from the active codepage to 'encoding'.
790 if (starting != 0) 790 if (starting != 0)
791 fix_arg_enc(); 791 fix_arg_enc();
792 #endif 792 #endif
793 793
794 /* Fire an autocommand to let people do custom font setup. This must be 794 // Fire an autocommand to let people do custom font setup. This must be
795 * after Vim has been setup for the new encoding. */ 795 // after Vim has been setup for the new encoding.
796 apply_autocmds(EVENT_ENCODINGCHANGED, NULL, (char_u *)"", FALSE, curbuf); 796 apply_autocmds(EVENT_ENCODINGCHANGED, NULL, (char_u *)"", FALSE, curbuf);
797 797
798 #ifdef FEAT_SPELL 798 #ifdef FEAT_SPELL
799 /* Need to reload spell dictionaries */ 799 // Need to reload spell dictionaries
800 spell_reload(); 800 spell_reload();
801 #endif 801 #endif
802 802
803 return NULL; 803 return NULL;
804 } 804 }
898 int 898 int
899 dbcs_class(unsigned lead, unsigned trail) 899 dbcs_class(unsigned lead, unsigned trail)
900 { 900 {
901 switch (enc_dbcs) 901 switch (enc_dbcs)
902 { 902 {
903 /* please add classify routine for your language in here */ 903 // please add classify routine for your language in here
904 904
905 case DBCS_JPNU: /* ? */ 905 case DBCS_JPNU: // ?
906 case DBCS_JPN: 906 case DBCS_JPN:
907 { 907 {
908 /* JIS code classification */ 908 // JIS code classification
909 unsigned char lb = lead; 909 unsigned char lb = lead;
910 unsigned char tb = trail; 910 unsigned char tb = trail;
911 911
912 /* convert process code to JIS */ 912 // convert process code to JIS
913 # if defined(MSWIN) || defined(WIN32UNIX) || defined(MACOS_X) 913 # if defined(MSWIN) || defined(WIN32UNIX) || defined(MACOS_X)
914 /* process code is SJIS */ 914 // process code is SJIS
915 if (lb <= 0x9f) 915 if (lb <= 0x9f)
916 lb = (lb - 0x81) * 2 + 0x21; 916 lb = (lb - 0x81) * 2 + 0x21;
917 else 917 else
918 lb = (lb - 0xc1) * 2 + 0x21; 918 lb = (lb - 0xc1) * 2 + 0x21;
919 if (tb <= 0x7e) 919 if (tb <= 0x7e)
933 * In japanese: SJIS,EUC,UNICODE,(JIS) 933 * In japanese: SJIS,EUC,UNICODE,(JIS)
934 * Note that JIS-code system don't use as 934 * Note that JIS-code system don't use as
935 * process code in most system because it uses 935 * process code in most system because it uses
936 * escape sequences(JIS is context depend encoding). 936 * escape sequences(JIS is context depend encoding).
937 */ 937 */
938 /* assume process code is JAPANESE-EUC */ 938 // assume process code is JAPANESE-EUC
939 lb &= 0x7f; 939 lb &= 0x7f;
940 tb &= 0x7f; 940 tb &= 0x7f;
941 # endif 941 # endif
942 /* exceptions */ 942 // exceptions
943 switch (lb << 8 | tb) 943 switch (lb << 8 | tb)
944 { 944 {
945 case 0x2121: /* ZENKAKU space */ 945 case 0x2121: // ZENKAKU space
946 return 0; 946 return 0;
947 case 0x2122: /* TOU-TEN (Japanese comma) */ 947 case 0x2122: // TOU-TEN (Japanese comma)
948 case 0x2123: /* KU-TEN (Japanese period) */ 948 case 0x2123: // KU-TEN (Japanese period)
949 case 0x2124: /* ZENKAKU comma */ 949 case 0x2124: // ZENKAKU comma
950 case 0x2125: /* ZENKAKU period */ 950 case 0x2125: // ZENKAKU period
951 return 1; 951 return 1;
952 case 0x213c: /* prolongedsound handled as KATAKANA */ 952 case 0x213c: // prolongedsound handled as KATAKANA
953 return 13; 953 return 13;
954 } 954 }
955 /* sieved by KU code */ 955 // sieved by KU code
956 switch (lb) 956 switch (lb)
957 { 957 {
958 case 0x21: 958 case 0x21:
959 case 0x22: 959 case 0x22:
960 /* special symbols */ 960 // special symbols
961 return 10; 961 return 10;
962 case 0x23: 962 case 0x23:
963 /* alpha-numeric */ 963 // alpha-numeric
964 return 11; 964 return 11;
965 case 0x24: 965 case 0x24:
966 /* hiragana */ 966 // hiragana
967 return 12; 967 return 12;
968 case 0x25: 968 case 0x25:
969 /* katakana */ 969 // katakana
970 return 13; 970 return 13;
971 case 0x26: 971 case 0x26:
972 /* greek */ 972 // greek
973 return 14; 973 return 14;
974 case 0x27: 974 case 0x27:
975 /* russian */ 975 // russian
976 return 15; 976 return 15;
977 case 0x28: 977 case 0x28:
978 /* lines */ 978 // lines
979 return 16; 979 return 16;
980 default: 980 default:
981 /* kanji */ 981 // kanji
982 return 17; 982 return 17;
983 } 983 }
984 } 984 }
985 985
986 case DBCS_KORU: /* ? */ 986 case DBCS_KORU: // ?
987 case DBCS_KOR: 987 case DBCS_KOR:
988 { 988 {
989 /* KS code classification */ 989 // KS code classification
990 unsigned char c1 = lead; 990 unsigned char c1 = lead;
991 unsigned char c2 = trail; 991 unsigned char c2 = trail;
992 992
993 /* 993 /*
994 * 20 : Hangul 994 * 20 : Hangul
1003 * 29 : Hiragana/Katakana 1003 * 29 : Hiragana/Katakana
1004 * 30 : Cyrillic Letter 1004 * 30 : Cyrillic Letter
1005 */ 1005 */
1006 1006
1007 if (c1 >= 0xB0 && c1 <= 0xC8) 1007 if (c1 >= 0xB0 && c1 <= 0xC8)
1008 /* Hangul */ 1008 // Hangul
1009 return 20; 1009 return 20;
1010 #if defined(MSWIN) || defined(WIN32UNIX) 1010 #if defined(MSWIN) || defined(WIN32UNIX)
1011 else if (c1 <= 0xA0 || c2 <= 0xA0) 1011 else if (c1 <= 0xA0 || c2 <= 0xA0)
1012 /* Extended Hangul Region : MS UHC(Unified Hangul Code) */ 1012 // Extended Hangul Region : MS UHC(Unified Hangul Code)
1013 /* c1: 0x81-0xA0 with c2: 0x41-0x5A, 0x61-0x7A, 0x81-0xFE 1013 // c1: 0x81-0xA0 with c2: 0x41-0x5A, 0x61-0x7A, 0x81-0xFE
1014 * c1: 0xA1-0xC6 with c2: 0x41-0x5A, 0x61-0x7A, 0x81-0xA0 1014 // c1: 0xA1-0xC6 with c2: 0x41-0x5A, 0x61-0x7A, 0x81-0xA0
1015 */
1016 return 20; 1015 return 20;
1017 #endif 1016 #endif
1018 1017
1019 else if (c1 >= 0xCA && c1 <= 0xFD) 1018 else if (c1 >= 0xCA && c1 <= 0xFD)
1020 /* Hanja */ 1019 // Hanja
1021 return 21; 1020 return 21;
1022 else switch (c1) 1021 else switch (c1)
1023 { 1022 {
1024 case 0xA1: 1023 case 0xA1:
1025 case 0xA2: 1024 case 0xA2:
1026 /* Symbols */ 1025 // Symbols
1027 return 22; 1026 return 22;
1028 case 0xA3: 1027 case 0xA3:
1029 /* Alpha-numeric */ 1028 // Alpha-numeric
1030 return 23; 1029 return 23;
1031 case 0xA4: 1030 case 0xA4:
1032 /* Hangul Letter(Alphabet) */ 1031 // Hangul Letter(Alphabet)
1033 return 24; 1032 return 24;
1034 case 0xA5: 1033 case 0xA5:
1035 /* Roman Numeral/Greek Letter */ 1034 // Roman Numeral/Greek Letter
1036 return 25; 1035 return 25;
1037 case 0xA6: 1036 case 0xA6:
1038 /* Box Drawings */ 1037 // Box Drawings
1039 return 26; 1038 return 26;
1040 case 0xA7: 1039 case 0xA7:
1041 /* Unit Symbols */ 1040 // Unit Symbols
1042 return 27; 1041 return 27;
1043 case 0xA8: 1042 case 0xA8:
1044 case 0xA9: 1043 case 0xA9:
1045 if (c2 <= 0xAF) 1044 if (c2 <= 0xAF)
1046 return 25; /* Roman Letter */ 1045 return 25; // Roman Letter
1047 else if (c2 >= 0xF6) 1046 else if (c2 >= 0xF6)
1048 return 22; /* Symbols */ 1047 return 22; // Symbols
1049 else 1048 else
1050 /* Circled/Parenthesized Letter */ 1049 // Circled/Parenthesized Letter
1051 return 28; 1050 return 28;
1052 case 0xAA: 1051 case 0xAA:
1053 case 0xAB: 1052 case 0xAB:
1054 /* Hiragana/Katakana */ 1053 // Hiragana/Katakana
1055 return 29; 1054 return 29;
1056 case 0xAC: 1055 case 0xAC:
1057 /* Cyrillic Letter */ 1056 // Cyrillic Letter
1058 return 30; 1057 return 30;
1059 } 1058 }
1060 } 1059 }
1061 default: 1060 default:
1062 break; 1061 break;
1101 { 1100 {
1102 if (c >= 0x100) 1101 if (c >= 0x100)
1103 { 1102 {
1104 buf[0] = (unsigned)c >> 8; 1103 buf[0] = (unsigned)c >> 8;
1105 buf[1] = c; 1104 buf[1] = c;
1106 /* Never use a NUL byte, it causes lots of trouble. It's an invalid 1105 // Never use a NUL byte, it causes lots of trouble. It's an invalid
1107 * character anyway. */ 1106 // character anyway.
1108 if (buf[1] == NUL) 1107 if (buf[1] == NUL)
1109 buf[1] = '\n'; 1108 buf[1] = '\n';
1110 return 2; 1109 return 2;
1111 } 1110 }
1112 buf[0] = c; 1111 buf[0] = c;
1129 dbcs_ptr2len( 1128 dbcs_ptr2len(
1130 char_u *p) 1129 char_u *p)
1131 { 1130 {
1132 int len; 1131 int len;
1133 1132
1134 /* Check if second byte is not missing. */ 1133 // Check if second byte is not missing.
1135 len = MB_BYTE2LEN(*p); 1134 len = MB_BYTE2LEN(*p);
1136 if (len == 2 && p[1] == NUL) 1135 if (len == 2 && p[1] == NUL)
1137 len = 1; 1136 len = 1;
1138 return len; 1137 return len;
1139 } 1138 }
1159 1158
1160 if (size < 1 || *p == NUL) 1159 if (size < 1 || *p == NUL)
1161 return 0; 1160 return 0;
1162 if (size == 1) 1161 if (size == 1)
1163 return 1; 1162 return 1;
1164 /* Check that second byte is not missing. */ 1163 // Check that second byte is not missing.
1165 len = MB_BYTE2LEN(*p); 1164 len = MB_BYTE2LEN(*p);
1166 if (len == 2 && p[1] == NUL) 1165 if (len == 2 && p[1] == NUL)
1167 len = 1; 1166 len = 1;
1168 return len; 1167 return len;
1169 } 1168 }
1180 static int 1179 static int
1181 intable(struct interval *table, size_t size, int c) 1180 intable(struct interval *table, size_t size, int c)
1182 { 1181 {
1183 int mid, bot, top; 1182 int mid, bot, top;
1184 1183
1185 /* first quick check for Latin1 etc. characters */ 1184 // first quick check for Latin1 etc. characters
1186 if (c < table[0].first) 1185 if (c < table[0].first)
1187 return FALSE; 1186 return FALSE;
1188 1187
1189 /* binary search in table */ 1188 // binary search in table
1190 bot = 0; 1189 bot = 0;
1191 top = (int)(size / sizeof(struct interval) - 1); 1190 top = (int)(size / sizeof(struct interval) - 1);
1192 while (top >= bot) 1191 while (top >= bot)
1193 { 1192 {
1194 mid = (bot + top) / 2; 1193 mid = (bot + top) / 2;
1200 return TRUE; 1199 return TRUE;
1201 } 1200 }
1202 return FALSE; 1201 return FALSE;
1203 } 1202 }
1204 1203
1205 /* Sorted list of non-overlapping intervals of East Asian Ambiguous 1204 // Sorted list of non-overlapping intervals of East Asian Ambiguous
1206 * characters, generated with ../runtime/tools/unicode.vim. */ 1205 // characters, generated with ../runtime/tools/unicode.vim.
1207 static struct interval ambiguous[] = 1206 static struct interval ambiguous[] =
1208 { 1207 {
1209 {0x00a1, 0x00a1}, 1208 {0x00a1, 0x00a1},
1210 {0x00a4, 0x00a4}, 1209 {0x00a4, 0x00a4},
1211 {0x00a7, 0x00a8}, 1210 {0x00a7, 0x00a8},
1408 * class 'A'(mbiguous). 1407 * class 'A'(mbiguous).
1409 */ 1408 */
1410 int 1409 int
1411 utf_char2cells(int c) 1410 utf_char2cells(int c)
1412 { 1411 {
1413 /* Sorted list of non-overlapping intervals of East Asian double width 1412 // Sorted list of non-overlapping intervals of East Asian double width
1414 * characters, generated with ../runtime/tools/unicode.vim. */ 1413 // characters, generated with ../runtime/tools/unicode.vim.
1415 static struct interval doublewidth[] = 1414 static struct interval doublewidth[] =
1416 { 1415 {
1417 {0x1100, 0x115f}, 1416 {0x1100, 0x115f},
1418 {0x231a, 0x231b}, 1417 {0x231a, 0x231b},
1419 {0x2329, 0x232a}, 1418 {0x2329, 0x232a},
1527 {0x1fa90, 0x1fa95}, 1526 {0x1fa90, 0x1fa95},
1528 {0x20000, 0x2fffd}, 1527 {0x20000, 0x2fffd},
1529 {0x30000, 0x3fffd} 1528 {0x30000, 0x3fffd}
1530 }; 1529 };
1531 1530
1532 /* Sorted list of non-overlapping intervals of Emoji characters that don't 1531 // Sorted list of non-overlapping intervals of Emoji characters that don't
1533 * have ambiguous or double width, 1532 // have ambiguous or double width,
1534 * based on http://unicode.org/emoji/charts/emoji-list.html */ 1533 // based on http://unicode.org/emoji/charts/emoji-list.html
1535 static struct interval emoji_width[] = 1534 static struct interval emoji_width[] =
1536 { 1535 {
1537 {0x1f1e6, 0x1f1ff}, 1536 {0x1f1e6, 0x1f1ff},
1538 {0x1f321, 0x1f321}, 1537 {0x1f321, 0x1f321},
1539 {0x1f324, 0x1f32c}, 1538 {0x1f324, 0x1f32c},
1583 * stuff. It should return 1 for ambiguous width chars! 1582 * stuff. It should return 1 for ambiguous width chars!
1584 */ 1583 */
1585 int n = wcwidth(c); 1584 int n = wcwidth(c);
1586 1585
1587 if (n < 0) 1586 if (n < 0)
1588 return 6; /* unprintable, displays <xxxx> */ 1587 return 6; // unprintable, displays <xxxx>
1589 if (n > 1) 1588 if (n > 1)
1590 return n; 1589 return n;
1591 #else 1590 #else
1592 if (!utf_printable(c)) 1591 if (!utf_printable(c))
1593 return 6; /* unprintable, displays <xxxx> */ 1592 return 6; // unprintable, displays <xxxx>
1594 if (intable(doublewidth, sizeof(doublewidth), c)) 1593 if (intable(doublewidth, sizeof(doublewidth), c))
1595 return 2; 1594 return 2;
1596 #endif 1595 #endif
1597 if (p_emoji && intable(emoji_width, sizeof(emoji_width), c)) 1596 if (p_emoji && intable(emoji_width, sizeof(emoji_width), c))
1598 return 2; 1597 return 2;
1599 } 1598 }
1600 1599
1601 /* Characters below 0x100 are influenced by 'isprint' option */ 1600 // Characters below 0x100 are influenced by 'isprint' option
1602 else if (c >= 0x80 && !vim_isprintc(c)) 1601 else if (c >= 0x80 && !vim_isprintc(c))
1603 return 4; /* unprintable, displays <xx> */ 1602 return 4; // unprintable, displays <xx>
1604 1603
1605 if (c >= 0x80 && *p_ambw == 'd' && intable(ambiguous, sizeof(ambiguous), c)) 1604 if (c >= 0x80 && *p_ambw == 'd' && intable(ambiguous, sizeof(ambiguous), c))
1606 return 2; 1605 return 2;
1607 1606
1608 return 1; 1607 return 1;
1623 utf_ptr2cells( 1622 utf_ptr2cells(
1624 char_u *p) 1623 char_u *p)
1625 { 1624 {
1626 int c; 1625 int c;
1627 1626
1628 /* Need to convert to a wide character. */ 1627 // Need to convert to a wide character.
1629 if (*p >= 0x80) 1628 if (*p >= 0x80)
1630 { 1629 {
1631 c = utf_ptr2char(p); 1630 c = utf_ptr2char(p);
1632 /* An illegal byte is displayed as <xx>. */ 1631 // An illegal byte is displayed as <xx>.
1633 if (utf_ptr2len(p) == 1 || c == NUL) 1632 if (utf_ptr2len(p) == 1 || c == NUL)
1634 return 4; 1633 return 4;
1635 /* If the char is ASCII it must be an overlong sequence. */ 1634 // If the char is ASCII it must be an overlong sequence.
1636 if (c < 0x80) 1635 if (c < 0x80)
1637 return char2cells(c); 1636 return char2cells(c);
1638 return utf_char2cells(c); 1637 return utf_char2cells(c);
1639 } 1638 }
1640 return 1; 1639 return 1;
1641 } 1640 }
1642 1641
1643 int 1642 int
1644 dbcs_ptr2cells(char_u *p) 1643 dbcs_ptr2cells(char_u *p)
1645 { 1644 {
1646 /* Number of cells is equal to number of bytes, except for euc-jp when 1645 // Number of cells is equal to number of bytes, except for euc-jp when
1647 * the first byte is 0x8e. */ 1646 // the first byte is 0x8e.
1648 if (enc_dbcs == DBCS_JPNU && *p == 0x8e) 1647 if (enc_dbcs == DBCS_JPNU && *p == 0x8e)
1649 return 1; 1648 return 1;
1650 return MB_BYTE2LEN(*p); 1649 return MB_BYTE2LEN(*p);
1651 } 1650 }
1652 1651
1664 static int 1663 static int
1665 utf_ptr2cells_len(char_u *p, int size) 1664 utf_ptr2cells_len(char_u *p, int size)
1666 { 1665 {
1667 int c; 1666 int c;
1668 1667
1669 /* Need to convert to a wide character. */ 1668 // Need to convert to a wide character.
1670 if (size > 0 && *p >= 0x80) 1669 if (size > 0 && *p >= 0x80)
1671 { 1670 {
1672 if (utf_ptr2len_len(p, size) < utf8len_tab[*p]) 1671 if (utf_ptr2len_len(p, size) < utf8len_tab[*p])
1673 return 1; /* truncated */ 1672 return 1; // truncated
1674 c = utf_ptr2char(p); 1673 c = utf_ptr2char(p);
1675 /* An illegal byte is displayed as <xx>. */ 1674 // An illegal byte is displayed as <xx>.
1676 if (utf_ptr2len(p) == 1 || c == NUL) 1675 if (utf_ptr2len(p) == 1 || c == NUL)
1677 return 4; 1676 return 4;
1678 /* If the char is ASCII it must be an overlong sequence. */ 1677 // If the char is ASCII it must be an overlong sequence.
1679 if (c < 0x80) 1678 if (c < 0x80)
1680 return char2cells(c); 1679 return char2cells(c);
1681 return utf_char2cells(c); 1680 return utf_char2cells(c);
1682 } 1681 }
1683 return 1; 1682 return 1;
1684 } 1683 }
1685 1684
1686 static int 1685 static int
1687 dbcs_ptr2cells_len(char_u *p, int size) 1686 dbcs_ptr2cells_len(char_u *p, int size)
1688 { 1687 {
1689 /* Number of cells is equal to number of bytes, except for euc-jp when 1688 // Number of cells is equal to number of bytes, except for euc-jp when
1690 * the first byte is 0x8e. */ 1689 // the first byte is 0x8e.
1691 if (size <= 1 || (enc_dbcs == DBCS_JPNU && *p == 0x8e)) 1690 if (size <= 1 || (enc_dbcs == DBCS_JPNU && *p == 0x8e))
1692 return 1; 1691 return 1;
1693 return MB_BYTE2LEN(*p); 1692 return MB_BYTE2LEN(*p);
1694 } 1693 }
1695 1694
1705 } 1704 }
1706 1705
1707 static int 1706 static int
1708 dbcs_char2cells(int c) 1707 dbcs_char2cells(int c)
1709 { 1708 {
1710 /* Number of cells is equal to number of bytes, except for euc-jp when 1709 // Number of cells is equal to number of bytes, except for euc-jp when
1711 * the first byte is 0x8e. */ 1710 // the first byte is 0x8e.
1712 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e) 1711 if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e)
1713 return 1; 1712 return 1;
1714 /* use the first byte */ 1713 // use the first byte
1715 return MB_BYTE2LEN((unsigned)c >> 8); 1714 return MB_BYTE2LEN((unsigned)c >> 8);
1716 } 1715 }
1717 1716
1718 /* 1717 /*
1719 * Return the number of cells occupied by string "p". 1718 * Return the number of cells occupied by string "p".
1742 } 1741 }
1743 1742
1744 int 1743 int
1745 dbcs_off2cells(unsigned off, unsigned max_off) 1744 dbcs_off2cells(unsigned off, unsigned max_off)
1746 { 1745 {
1747 /* never check beyond end of the line */ 1746 // never check beyond end of the line
1748 if (off >= max_off) 1747 if (off >= max_off)
1749 return 1; 1748 return 1;
1750 1749
1751 /* Number of cells is equal to number of bytes, except for euc-jp when 1750 // Number of cells is equal to number of bytes, except for euc-jp when
1752 * the first byte is 0x8e. */ 1751 // the first byte is 0x8e.
1753 if (enc_dbcs == DBCS_JPNU && ScreenLines[off] == 0x8e) 1752 if (enc_dbcs == DBCS_JPNU && ScreenLines[off] == 0x8e)
1754 return 1; 1753 return 1;
1755 return MB_BYTE2LEN(ScreenLines[off]); 1754 return MB_BYTE2LEN(ScreenLines[off]);
1756 } 1755 }
1757 1756
1789 int 1788 int
1790 utf_ptr2char(char_u *p) 1789 utf_ptr2char(char_u *p)
1791 { 1790 {
1792 int len; 1791 int len;
1793 1792
1794 if (p[0] < 0x80) /* be quick for ASCII */ 1793 if (p[0] < 0x80) // be quick for ASCII
1795 return p[0]; 1794 return p[0];
1796 1795
1797 len = utf8len_tab_zero[p[0]]; 1796 len = utf8len_tab_zero[p[0]];
1798 if (len > 1 && (p[1] & 0xc0) == 0x80) 1797 if (len > 1 && (p[1] & 0xc0) == 0x80)
1799 { 1798 {
1821 + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f); 1820 + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f);
1822 } 1821 }
1823 } 1822 }
1824 } 1823 }
1825 } 1824 }
1826 /* Illegal value, just return the first byte */ 1825 // Illegal value, just return the first byte
1827 return p[0]; 1826 return p[0];
1828 } 1827 }
1829 1828
1830 /* 1829 /*
1831 * Convert a UTF-8 byte sequence to a wide character. 1830 * Convert a UTF-8 byte sequence to a wide character.
1846 static int 1845 static int
1847 utf_safe_read_char_adv(char_u **s, size_t *n) 1846 utf_safe_read_char_adv(char_u **s, size_t *n)
1848 { 1847 {
1849 int c, k; 1848 int c, k;
1850 1849
1851 if (*n == 0) /* end of buffer */ 1850 if (*n == 0) // end of buffer
1852 return 0; 1851 return 0;
1853 1852
1854 k = utf8len_tab_zero[**s]; 1853 k = utf8len_tab_zero[**s];
1855 1854
1856 if (k == 1) 1855 if (k == 1)
1857 { 1856 {
1858 /* ASCII character or NUL */ 1857 // ASCII character or NUL
1859 (*n)--; 1858 (*n)--;
1860 return *(*s)++; 1859 return *(*s)++;
1861 } 1860 }
1862 1861
1863 if ((size_t)k <= *n) 1862 if ((size_t)k <= *n)
1864 { 1863 {
1865 /* We have a multibyte sequence and it isn't truncated by buffer 1864 // We have a multibyte sequence and it isn't truncated by buffer
1866 * limits so utf_ptr2char() is safe to use. Or the first byte is 1865 // limits so utf_ptr2char() is safe to use. Or the first byte is
1867 * illegal (k=0), and it's also safe to use utf_ptr2char(). */ 1866 // illegal (k=0), and it's also safe to use utf_ptr2char().
1868 c = utf_ptr2char(*s); 1867 c = utf_ptr2char(*s);
1869 1868
1870 /* On failure, utf_ptr2char() returns the first byte, so here we 1869 // On failure, utf_ptr2char() returns the first byte, so here we
1871 * check equality with the first byte. The only non-ASCII character 1870 // check equality with the first byte. The only non-ASCII character
1872 * which equals the first byte of its own UTF-8 representation is 1871 // which equals the first byte of its own UTF-8 representation is
1873 * U+00C3 (UTF-8: 0xC3 0x83), so need to check that special case too. 1872 // U+00C3 (UTF-8: 0xC3 0x83), so need to check that special case too.
1874 * It's safe even if n=1, else we would have k=2 > n. */ 1873 // It's safe even if n=1, else we would have k=2 > n.
1875 if (c != (int)(**s) || (c == 0xC3 && (*s)[1] == 0x83)) 1874 if (c != (int)(**s) || (c == 0xC3 && (*s)[1] == 0x83))
1876 { 1875 {
1877 /* byte sequence was successfully decoded */ 1876 // byte sequence was successfully decoded
1878 *s += k; 1877 *s += k;
1879 *n -= k; 1878 *n -= k;
1880 return c; 1879 return c;
1881 } 1880 }
1882 } 1881 }
1883 1882
1884 /* byte sequence is incomplete or illegal */ 1883 // byte sequence is incomplete or illegal
1885 return -1; 1884 return -1;
1886 } 1885 }
1887 1886
1888 /* 1887 /*
1889 * Get character at **pp and advance *pp to the next character. 1888 * Get character at **pp and advance *pp to the next character.
1941 * composing characters. 1940 * composing characters.
1942 */ 1941 */
1943 int 1942 int
1944 utfc_ptr2char( 1943 utfc_ptr2char(
1945 char_u *p, 1944 char_u *p,
1946 int *pcc) /* return: composing chars, last one is 0 */ 1945 int *pcc) // return: composing chars, last one is 0
1947 { 1946 {
1948 int len; 1947 int len;
1949 int c; 1948 int c;
1950 int cc; 1949 int cc;
1951 int i = 0; 1950 int i = 0;
1952 1951
1953 c = utf_ptr2char(p); 1952 c = utf_ptr2char(p);
1954 len = utf_ptr2len(p); 1953 len = utf_ptr2len(p);
1955 1954
1956 /* Only accept a composing char when the first char isn't illegal. */ 1955 // Only accept a composing char when the first char isn't illegal.
1957 if ((len > 1 || *p < 0x80) 1956 if ((len > 1 || *p < 0x80)
1958 && p[len] >= 0x80 1957 && p[len] >= 0x80
1959 && UTF_COMPOSINGLIKE(p, p + len)) 1958 && UTF_COMPOSINGLIKE(p, p + len))
1960 { 1959 {
1961 cc = utf_ptr2char(p + len); 1960 cc = utf_ptr2char(p + len);
1968 if (p[len] < 0x80 || !utf_iscomposing(cc = utf_ptr2char(p + len))) 1967 if (p[len] < 0x80 || !utf_iscomposing(cc = utf_ptr2char(p + len)))
1969 break; 1968 break;
1970 } 1969 }
1971 } 1970 }
1972 1971
1973 if (i < MAX_MCO) /* last composing char must be 0 */ 1972 if (i < MAX_MCO) // last composing char must be 0
1974 pcc[i] = 0; 1973 pcc[i] = 0;
1975 1974
1976 return c; 1975 return c;
1977 } 1976 }
1978 1977
1981 * composing characters. Use no more than p[maxlen]. 1980 * composing characters. Use no more than p[maxlen].
1982 */ 1981 */
1983 int 1982 int
1984 utfc_ptr2char_len( 1983 utfc_ptr2char_len(
1985 char_u *p, 1984 char_u *p,
1986 int *pcc, /* return: composing chars, last one is 0 */ 1985 int *pcc, // return: composing chars, last one is 0
1987 int maxlen) 1986 int maxlen)
1988 { 1987 {
1989 int len; 1988 int len;
1990 int c; 1989 int c;
1991 int cc; 1990 int cc;
1992 int i = 0; 1991 int i = 0;
1993 1992
1994 c = utf_ptr2char(p); 1993 c = utf_ptr2char(p);
1995 len = utf_ptr2len_len(p, maxlen); 1994 len = utf_ptr2len_len(p, maxlen);
1996 /* Only accept a composing char when the first char isn't illegal. */ 1995 // Only accept a composing char when the first char isn't illegal.
1997 if ((len > 1 || *p < 0x80) 1996 if ((len > 1 || *p < 0x80)
1998 && len < maxlen 1997 && len < maxlen
1999 && p[len] >= 0x80 1998 && p[len] >= 0x80
2000 && UTF_COMPOSINGLIKE(p, p + len)) 1999 && UTF_COMPOSINGLIKE(p, p + len))
2001 { 2000 {
2011 || !utf_iscomposing(cc = utf_ptr2char(p + len))) 2010 || !utf_iscomposing(cc = utf_ptr2char(p + len)))
2012 break; 2011 break;
2013 } 2012 }
2014 } 2013 }
2015 2014
2016 if (i < MAX_MCO) /* last composing char must be 0 */ 2015 if (i < MAX_MCO) // last composing char must be 0
2017 pcc[i] = 0; 2016 pcc[i] = 0;
2018 2017
2019 return c; 2018 return c;
2020 } 2019 }
2021 2020
2089 int i; 2088 int i;
2090 int m; 2089 int m;
2091 2090
2092 len = utf8len_tab[*p]; 2091 len = utf8len_tab[*p];
2093 if (len == 1) 2092 if (len == 1)
2094 return 1; /* NUL, ascii or illegal lead byte */ 2093 return 1; // NUL, ascii or illegal lead byte
2095 if (len > size) 2094 if (len > size)
2096 m = size; /* incomplete byte sequence. */ 2095 m = size; // incomplete byte sequence.
2097 else 2096 else
2098 m = len; 2097 m = len;
2099 for (i = 1; i < m; ++i) 2098 for (i = 1; i < m; ++i)
2100 if ((p[i] & 0xc0) != 0x80) 2099 if ((p[i] & 0xc0) != 0x80)
2101 return 1; 2100 return 1;
2115 int prevlen; 2114 int prevlen;
2116 #endif 2115 #endif
2117 2116
2118 if (b0 == NUL) 2117 if (b0 == NUL)
2119 return 0; 2118 return 0;
2120 if (b0 < 0x80 && p[1] < 0x80) /* be quick for ASCII */ 2119 if (b0 < 0x80 && p[1] < 0x80) // be quick for ASCII
2121 return 1; 2120 return 1;
2122 2121
2123 /* Skip over first UTF-8 char, stopping at a NUL byte. */ 2122 // Skip over first UTF-8 char, stopping at a NUL byte.
2124 len = utf_ptr2len(p); 2123 len = utf_ptr2len(p);
2125 2124
2126 /* Check for illegal byte. */ 2125 // Check for illegal byte.
2127 if (len == 1 && b0 >= 0x80) 2126 if (len == 1 && b0 >= 0x80)
2128 return 1; 2127 return 1;
2129 2128
2130 /* 2129 /*
2131 * Check for composing characters. We can handle only the first six, but 2130 * Check for composing characters. We can handle only the first six, but
2137 for (;;) 2136 for (;;)
2138 { 2137 {
2139 if (p[len] < 0x80 || !UTF_COMPOSINGLIKE(p + prevlen, p + len)) 2138 if (p[len] < 0x80 || !UTF_COMPOSINGLIKE(p + prevlen, p + len))
2140 return len; 2139 return len;
2141 2140
2142 /* Skip over composing char */ 2141 // Skip over composing char
2143 #ifdef FEAT_ARABIC 2142 #ifdef FEAT_ARABIC
2144 prevlen = len; 2143 prevlen = len;
2145 #endif 2144 #endif
2146 len += utf_ptr2len(p + len); 2145 len += utf_ptr2len(p + len);
2147 } 2146 }
2161 int prevlen; 2160 int prevlen;
2162 #endif 2161 #endif
2163 2162
2164 if (size < 1 || *p == NUL) 2163 if (size < 1 || *p == NUL)
2165 return 0; 2164 return 0;
2166 if (p[0] < 0x80 && (size == 1 || p[1] < 0x80)) /* be quick for ASCII */ 2165 if (p[0] < 0x80 && (size == 1 || p[1] < 0x80)) // be quick for ASCII
2167 return 1; 2166 return 1;
2168 2167
2169 /* Skip over first UTF-8 char, stopping at a NUL byte. */ 2168 // Skip over first UTF-8 char, stopping at a NUL byte.
2170 len = utf_ptr2len_len(p, size); 2169 len = utf_ptr2len_len(p, size);
2171 2170
2172 /* Check for illegal byte and incomplete byte sequence. */ 2171 // Check for illegal byte and incomplete byte sequence.
2173 if ((len == 1 && p[0] >= 0x80) || len > size) 2172 if ((len == 1 && p[0] >= 0x80) || len > size)
2174 return 1; 2173 return 1;
2175 2174
2176 /* 2175 /*
2177 * Check for composing characters. We can handle only the first six, but 2176 * Check for composing characters. We can handle only the first six, but
2196 break; 2195 break;
2197 2196
2198 if (!UTF_COMPOSINGLIKE(p + prevlen, p + len)) 2197 if (!UTF_COMPOSINGLIKE(p + prevlen, p + len))
2199 break; 2198 break;
2200 2199
2201 /* Skip over composing char */ 2200 // Skip over composing char
2202 #ifdef FEAT_ARABIC 2201 #ifdef FEAT_ARABIC
2203 prevlen = len; 2202 prevlen = len;
2204 #endif 2203 #endif
2205 len += len_next_char; 2204 len += len_next_char;
2206 } 2205 }
2232 * Returns the number of bytes. 2231 * Returns the number of bytes.
2233 */ 2232 */
2234 int 2233 int
2235 utf_char2bytes(int c, char_u *buf) 2234 utf_char2bytes(int c, char_u *buf)
2236 { 2235 {
2237 if (c < 0x80) /* 7 bits */ 2236 if (c < 0x80) // 7 bits
2238 { 2237 {
2239 buf[0] = c; 2238 buf[0] = c;
2240 return 1; 2239 return 1;
2241 } 2240 }
2242 if (c < 0x800) /* 11 bits */ 2241 if (c < 0x800) // 11 bits
2243 { 2242 {
2244 buf[0] = 0xc0 + ((unsigned)c >> 6); 2243 buf[0] = 0xc0 + ((unsigned)c >> 6);
2245 buf[1] = 0x80 + (c & 0x3f); 2244 buf[1] = 0x80 + (c & 0x3f);
2246 return 2; 2245 return 2;
2247 } 2246 }
2248 if (c < 0x10000) /* 16 bits */ 2247 if (c < 0x10000) // 16 bits
2249 { 2248 {
2250 buf[0] = 0xe0 + ((unsigned)c >> 12); 2249 buf[0] = 0xe0 + ((unsigned)c >> 12);
2251 buf[1] = 0x80 + (((unsigned)c >> 6) & 0x3f); 2250 buf[1] = 0x80 + (((unsigned)c >> 6) & 0x3f);
2252 buf[2] = 0x80 + (c & 0x3f); 2251 buf[2] = 0x80 + (c & 0x3f);
2253 return 3; 2252 return 3;
2254 } 2253 }
2255 if (c < 0x200000) /* 21 bits */ 2254 if (c < 0x200000) // 21 bits
2256 { 2255 {
2257 buf[0] = 0xf0 + ((unsigned)c >> 18); 2256 buf[0] = 0xf0 + ((unsigned)c >> 18);
2258 buf[1] = 0x80 + (((unsigned)c >> 12) & 0x3f); 2257 buf[1] = 0x80 + (((unsigned)c >> 12) & 0x3f);
2259 buf[2] = 0x80 + (((unsigned)c >> 6) & 0x3f); 2258 buf[2] = 0x80 + (((unsigned)c >> 6) & 0x3f);
2260 buf[3] = 0x80 + (c & 0x3f); 2259 buf[3] = 0x80 + (c & 0x3f);
2261 return 4; 2260 return 4;
2262 } 2261 }
2263 if (c < 0x4000000) /* 26 bits */ 2262 if (c < 0x4000000) // 26 bits
2264 { 2263 {
2265 buf[0] = 0xf8 + ((unsigned)c >> 24); 2264 buf[0] = 0xf8 + ((unsigned)c >> 24);
2266 buf[1] = 0x80 + (((unsigned)c >> 18) & 0x3f); 2265 buf[1] = 0x80 + (((unsigned)c >> 18) & 0x3f);
2267 buf[2] = 0x80 + (((unsigned)c >> 12) & 0x3f); 2266 buf[2] = 0x80 + (((unsigned)c >> 12) & 0x3f);
2268 buf[3] = 0x80 + (((unsigned)c >> 6) & 0x3f); 2267 buf[3] = 0x80 + (((unsigned)c >> 6) & 0x3f);
2269 buf[4] = 0x80 + (c & 0x3f); 2268 buf[4] = 0x80 + (c & 0x3f);
2270 return 5; 2269 return 5;
2271 } 2270 }
2272 /* 31 bits */ 2271 // 31 bits
2273 buf[0] = 0xfc + ((unsigned)c >> 30); 2272 buf[0] = 0xfc + ((unsigned)c >> 30);
2274 buf[1] = 0x80 + (((unsigned)c >> 24) & 0x3f); 2273 buf[1] = 0x80 + (((unsigned)c >> 24) & 0x3f);
2275 buf[2] = 0x80 + (((unsigned)c >> 18) & 0x3f); 2274 buf[2] = 0x80 + (((unsigned)c >> 18) & 0x3f);
2276 buf[3] = 0x80 + (((unsigned)c >> 12) & 0x3f); 2275 buf[3] = 0x80 + (((unsigned)c >> 12) & 0x3f);
2277 buf[4] = 0x80 + (((unsigned)c >> 6) & 0x3f); 2276 buf[4] = 0x80 + (((unsigned)c >> 6) & 0x3f);
2296 * Based on code from Markus Kuhn. 2295 * Based on code from Markus Kuhn.
2297 */ 2296 */
2298 int 2297 int
2299 utf_iscomposing(int c) 2298 utf_iscomposing(int c)
2300 { 2299 {
2301 /* Sorted list of non-overlapping intervals. 2300 // Sorted list of non-overlapping intervals.
2302 * Generated by ../runtime/tools/unicode.vim. */ 2301 // Generated by ../runtime/tools/unicode.vim.
2303 static struct interval combining[] = 2302 static struct interval combining[] =
2304 { 2303 {
2305 {0x0300, 0x036f}, 2304 {0x0300, 0x036f},
2306 {0x0483, 0x0489}, 2305 {0x0483, 0x0489},
2307 {0x0591, 0x05bd}, 2306 {0x0591, 0x05bd},
2598 /* 2597 /*
2599 * Assume the iswprint() library function works better than our own stuff. 2598 * Assume the iswprint() library function works better than our own stuff.
2600 */ 2599 */
2601 return iswprint(c); 2600 return iswprint(c);
2602 #else 2601 #else
2603 /* Sorted list of non-overlapping intervals. 2602 // Sorted list of non-overlapping intervals.
2604 * 0xd800-0xdfff is reserved for UTF-16, actually illegal. */ 2603 // 0xd800-0xdfff is reserved for UTF-16, actually illegal.
2605 static struct interval nonprint[] = 2604 static struct interval nonprint[] =
2606 { 2605 {
2607 {0x070f, 0x070f}, {0x180b, 0x180e}, {0x200b, 0x200f}, {0x202a, 0x202e}, 2606 {0x070f, 0x070f}, {0x180b, 0x180e}, {0x200b, 0x200f}, {0x202a, 0x202e},
2608 {0x206a, 0x206f}, {0xd800, 0xdfff}, {0xfeff, 0xfeff}, {0xfff9, 0xfffb}, 2607 {0x206a, 0x206f}, {0xd800, 0xdfff}, {0xfeff, 0xfeff}, {0xfff9, 0xfffb},
2609 {0xfffe, 0xffff} 2608 {0xfffe, 0xffff}
2611 2610
2612 return !intable(nonprint, sizeof(nonprint), c); 2611 return !intable(nonprint, sizeof(nonprint), c);
2613 #endif 2612 #endif
2614 } 2613 }
2615 2614
2616 /* Sorted list of non-overlapping intervals of all Emoji characters, 2615 // Sorted list of non-overlapping intervals of all Emoji characters,
2617 * based on http://unicode.org/emoji/charts/emoji-list.html */ 2616 // based on http://unicode.org/emoji/charts/emoji-list.html
2618 static struct interval emoji_all[] = 2617 static struct interval emoji_all[] =
2619 { 2618 {
2620 {0x203c, 0x203c}, 2619 {0x203c, 0x203c},
2621 {0x2049, 0x2049}, 2620 {0x2049, 0x2049},
2622 {0x2122, 0x2122}, 2621 {0x2122, 0x2122},
2774 } 2773 }
2775 2774
2776 int 2775 int
2777 utf_class_buf(int c, buf_T *buf) 2776 utf_class_buf(int c, buf_T *buf)
2778 { 2777 {
2779 /* sorted list of non-overlapping intervals */ 2778 // sorted list of non-overlapping intervals
2780 static struct clinterval 2779 static struct clinterval
2781 { 2780 {
2782 unsigned int first; 2781 unsigned int first;
2783 unsigned int last; 2782 unsigned int last;
2784 unsigned int class; 2783 unsigned int class;
2785 } classes[] = 2784 } classes[] =
2786 { 2785 {
2787 {0x037e, 0x037e, 1}, /* Greek question mark */ 2786 {0x037e, 0x037e, 1}, // Greek question mark
2788 {0x0387, 0x0387, 1}, /* Greek ano teleia */ 2787 {0x0387, 0x0387, 1}, // Greek ano teleia
2789 {0x055a, 0x055f, 1}, /* Armenian punctuation */ 2788 {0x055a, 0x055f, 1}, // Armenian punctuation
2790 {0x0589, 0x0589, 1}, /* Armenian full stop */ 2789 {0x0589, 0x0589, 1}, // Armenian full stop
2791 {0x05be, 0x05be, 1}, 2790 {0x05be, 0x05be, 1},
2792 {0x05c0, 0x05c0, 1}, 2791 {0x05c0, 0x05c0, 1},
2793 {0x05c3, 0x05c3, 1}, 2792 {0x05c3, 0x05c3, 1},
2794 {0x05f3, 0x05f4, 1}, 2793 {0x05f3, 0x05f4, 1},
2795 {0x060c, 0x060c, 1}, 2794 {0x060c, 0x060c, 1},
2796 {0x061b, 0x061b, 1}, 2795 {0x061b, 0x061b, 1},
2797 {0x061f, 0x061f, 1}, 2796 {0x061f, 0x061f, 1},
2798 {0x066a, 0x066d, 1}, 2797 {0x066a, 0x066d, 1},
2799 {0x06d4, 0x06d4, 1}, 2798 {0x06d4, 0x06d4, 1},
2800 {0x0700, 0x070d, 1}, /* Syriac punctuation */ 2799 {0x0700, 0x070d, 1}, // Syriac punctuation
2801 {0x0964, 0x0965, 1}, 2800 {0x0964, 0x0965, 1},
2802 {0x0970, 0x0970, 1}, 2801 {0x0970, 0x0970, 1},
2803 {0x0df4, 0x0df4, 1}, 2802 {0x0df4, 0x0df4, 1},
2804 {0x0e4f, 0x0e4f, 1}, 2803 {0x0e4f, 0x0e4f, 1},
2805 {0x0e5a, 0x0e5b, 1}, 2804 {0x0e5a, 0x0e5b, 1},
2806 {0x0f04, 0x0f12, 1}, 2805 {0x0f04, 0x0f12, 1},
2807 {0x0f3a, 0x0f3d, 1}, 2806 {0x0f3a, 0x0f3d, 1},
2808 {0x0f85, 0x0f85, 1}, 2807 {0x0f85, 0x0f85, 1},
2809 {0x104a, 0x104f, 1}, /* Myanmar punctuation */ 2808 {0x104a, 0x104f, 1}, // Myanmar punctuation
2810 {0x10fb, 0x10fb, 1}, /* Georgian punctuation */ 2809 {0x10fb, 0x10fb, 1}, // Georgian punctuation
2811 {0x1361, 0x1368, 1}, /* Ethiopic punctuation */ 2810 {0x1361, 0x1368, 1}, // Ethiopic punctuation
2812 {0x166d, 0x166e, 1}, /* Canadian Syl. punctuation */ 2811 {0x166d, 0x166e, 1}, // Canadian Syl. punctuation
2813 {0x1680, 0x1680, 0}, 2812 {0x1680, 0x1680, 0},
2814 {0x169b, 0x169c, 1}, 2813 {0x169b, 0x169c, 1},
2815 {0x16eb, 0x16ed, 1}, 2814 {0x16eb, 0x16ed, 1},
2816 {0x1735, 0x1736, 1}, 2815 {0x1735, 0x1736, 1},
2817 {0x17d4, 0x17dc, 1}, /* Khmer punctuation */ 2816 {0x17d4, 0x17dc, 1}, // Khmer punctuation
2818 {0x1800, 0x180a, 1}, /* Mongolian punctuation */ 2817 {0x1800, 0x180a, 1}, // Mongolian punctuation
2819 {0x2000, 0x200b, 0}, /* spaces */ 2818 {0x2000, 0x200b, 0}, // spaces
2820 {0x200c, 0x2027, 1}, /* punctuation and symbols */ 2819 {0x200c, 0x2027, 1}, // punctuation and symbols
2821 {0x2028, 0x2029, 0}, 2820 {0x2028, 0x2029, 0},
2822 {0x202a, 0x202e, 1}, /* punctuation and symbols */ 2821 {0x202a, 0x202e, 1}, // punctuation and symbols
2823 {0x202f, 0x202f, 0}, 2822 {0x202f, 0x202f, 0},
2824 {0x2030, 0x205e, 1}, /* punctuation and symbols */ 2823 {0x2030, 0x205e, 1}, // punctuation and symbols
2825 {0x205f, 0x205f, 0}, 2824 {0x205f, 0x205f, 0},
2826 {0x2060, 0x27ff, 1}, /* punctuation and symbols */ 2825 {0x2060, 0x27ff, 1}, // punctuation and symbols
2827 {0x2070, 0x207f, 0x2070}, /* superscript */ 2826 {0x2070, 0x207f, 0x2070}, // superscript
2828 {0x2080, 0x2094, 0x2080}, /* subscript */ 2827 {0x2080, 0x2094, 0x2080}, // subscript
2829 {0x20a0, 0x27ff, 1}, /* all kinds of symbols */ 2828 {0x20a0, 0x27ff, 1}, // all kinds of symbols
2830 {0x2800, 0x28ff, 0x2800}, /* braille */ 2829 {0x2800, 0x28ff, 0x2800}, // braille
2831 {0x2900, 0x2998, 1}, /* arrows, brackets, etc. */ 2830 {0x2900, 0x2998, 1}, // arrows, brackets, etc.
2832 {0x29d8, 0x29db, 1}, 2831 {0x29d8, 0x29db, 1},
2833 {0x29fc, 0x29fd, 1}, 2832 {0x29fc, 0x29fd, 1},
2834 {0x2e00, 0x2e7f, 1}, /* supplemental punctuation */ 2833 {0x2e00, 0x2e7f, 1}, // supplemental punctuation
2835 {0x3000, 0x3000, 0}, /* ideographic space */ 2834 {0x3000, 0x3000, 0}, // ideographic space
2836 {0x3001, 0x3020, 1}, /* ideographic punctuation */ 2835 {0x3001, 0x3020, 1}, // ideographic punctuation
2837 {0x3030, 0x3030, 1}, 2836 {0x3030, 0x3030, 1},
2838 {0x303d, 0x303d, 1}, 2837 {0x303d, 0x303d, 1},
2839 {0x3040, 0x309f, 0x3040}, /* Hiragana */ 2838 {0x3040, 0x309f, 0x3040}, // Hiragana
2840 {0x30a0, 0x30ff, 0x30a0}, /* Katakana */ 2839 {0x30a0, 0x30ff, 0x30a0}, // Katakana
2841 {0x3300, 0x9fff, 0x4e00}, /* CJK Ideographs */ 2840 {0x3300, 0x9fff, 0x4e00}, // CJK Ideographs
2842 {0xac00, 0xd7a3, 0xac00}, /* Hangul Syllables */ 2841 {0xac00, 0xd7a3, 0xac00}, // Hangul Syllables
2843 {0xf900, 0xfaff, 0x4e00}, /* CJK Ideographs */ 2842 {0xf900, 0xfaff, 0x4e00}, // CJK Ideographs
2844 {0xfd3e, 0xfd3f, 1}, 2843 {0xfd3e, 0xfd3f, 1},
2845 {0xfe30, 0xfe6b, 1}, /* punctuation forms */ 2844 {0xfe30, 0xfe6b, 1}, // punctuation forms
2846 {0xff00, 0xff0f, 1}, /* half/fullwidth ASCII */ 2845 {0xff00, 0xff0f, 1}, // half/fullwidth ASCII
2847 {0xff1a, 0xff20, 1}, /* half/fullwidth ASCII */ 2846 {0xff1a, 0xff20, 1}, // half/fullwidth ASCII
2848 {0xff3b, 0xff40, 1}, /* half/fullwidth ASCII */ 2847 {0xff3b, 0xff40, 1}, // half/fullwidth ASCII
2849 {0xff5b, 0xff65, 1}, /* half/fullwidth ASCII */ 2848 {0xff5b, 0xff65, 1}, // half/fullwidth ASCII
2850 {0x1d000, 0x1d24f, 1}, /* Musical notation */ 2849 {0x1d000, 0x1d24f, 1}, // Musical notation
2851 {0x1d400, 0x1d7ff, 1}, /* Mathematical Alphanumeric Symbols */ 2850 {0x1d400, 0x1d7ff, 1}, // Mathematical Alphanumeric Symbols
2852 {0x1f000, 0x1f2ff, 1}, /* Game pieces; enclosed characters */ 2851 {0x1f000, 0x1f2ff, 1}, // Game pieces; enclosed characters
2853 {0x1f300, 0x1f9ff, 1}, /* Many symbol blocks */ 2852 {0x1f300, 0x1f9ff, 1}, // Many symbol blocks
2854 {0x20000, 0x2a6df, 0x4e00}, /* CJK Ideographs */ 2853 {0x20000, 0x2a6df, 0x4e00}, // CJK Ideographs
2855 {0x2a700, 0x2b73f, 0x4e00}, /* CJK Ideographs */ 2854 {0x2a700, 0x2b73f, 0x4e00}, // CJK Ideographs
2856 {0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */ 2855 {0x2b740, 0x2b81f, 0x4e00}, // CJK Ideographs
2857 {0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */ 2856 {0x2f800, 0x2fa1f, 0x4e00}, // CJK Ideographs
2858 }; 2857 };
2859 2858
2860 int bot = 0; 2859 int bot = 0;
2861 int top = sizeof(classes) / sizeof(struct clinterval) - 1; 2860 int top = sizeof(classes) / sizeof(struct clinterval) - 1;
2862 int mid; 2861 int mid;
2863 2862
2864 /* First quick check for Latin1 characters, use 'iskeyword'. */ 2863 // First quick check for Latin1 characters, use 'iskeyword'.
2865 if (c < 0x100) 2864 if (c < 0x100)
2866 { 2865 {
2867 if (c == ' ' || c == '\t' || c == NUL || c == 0xa0) 2866 if (c == ' ' || c == '\t' || c == NUL || c == 0xa0)
2868 return 0; /* blank */ 2867 return 0; // blank
2869 if (vim_iswordc_buf(c, buf)) 2868 if (vim_iswordc_buf(c, buf))
2870 return 2; /* word character */ 2869 return 2; // word character
2871 return 1; /* punctuation */ 2870 return 1; // punctuation
2872 } 2871 }
2873 2872
2874 /* binary search in table */ 2873 // binary search in table
2875 while (top >= bot) 2874 while (top >= bot)
2876 { 2875 {
2877 mid = (bot + top) / 2; 2876 mid = (bot + top) / 2;
2878 if (classes[mid].last < (unsigned int)c) 2877 if (classes[mid].last < (unsigned int)c)
2879 bot = mid + 1; 2878 bot = mid + 1;
2881 top = mid - 1; 2880 top = mid - 1;
2882 else 2881 else
2883 return (int)classes[mid].class; 2882 return (int)classes[mid].class;
2884 } 2883 }
2885 2884
2886 /* emoji */ 2885 // emoji
2887 if (intable(emoji_all, sizeof(emoji_all), c)) 2886 if (intable(emoji_all, sizeof(emoji_all), c))
2888 return 3; 2887 return 3;
2889 2888
2890 /* most other characters are "word" characters */ 2889 // most other characters are "word" characters
2891 return 2; 2890 return 2;
2892 } 2891 }
2893 2892
2894 int 2893 int
2895 utf_ambiguous_width(int c) 2894 utf_ambiguous_width(int c)
3125 utf_convert( 3124 utf_convert(
3126 int a, 3125 int a,
3127 convertStruct table[], 3126 convertStruct table[],
3128 int tableSize) 3127 int tableSize)
3129 { 3128 {
3130 int start, mid, end; /* indices into table */ 3129 int start, mid, end; // indices into table
3131 int entries = tableSize / sizeof(convertStruct); 3130 int entries = tableSize / sizeof(convertStruct);
3132 3131
3133 start = 0; 3132 start = 0;
3134 end = entries; 3133 end = entries;
3135 while (start < end) 3134 while (start < end)
3136 { 3135 {
3137 /* need to search further */ 3136 // need to search further
3138 mid = (end + start) / 2; 3137 mid = (end + start) / 2;
3139 if (table[mid].rangeEnd < a) 3138 if (table[mid].rangeEnd < a)
3140 start = mid + 1; 3139 start = mid + 1;
3141 else 3140 else
3142 end = mid; 3141 end = mid;
3156 */ 3155 */
3157 int 3156 int
3158 utf_fold(int a) 3157 utf_fold(int a)
3159 { 3158 {
3160 if (a < 0x80) 3159 if (a < 0x80)
3161 /* be fast for ASCII */ 3160 // be fast for ASCII
3162 return a >= 0x41 && a <= 0x5a ? a + 32 : a; 3161 return a >= 0x41 && a <= 0x5a ? a + 32 : a;
3163 return utf_convert(a, foldCase, (int)sizeof(foldCase)); 3162 return utf_convert(a, foldCase, (int)sizeof(foldCase));
3164 } 3163 }
3165 3164
3166 static convertStruct toLower[] = 3165 static convertStruct toLower[] =
3535 * simple case folding. 3534 * simple case folding.
3536 */ 3535 */
3537 int 3536 int
3538 utf_toupper(int a) 3537 utf_toupper(int a)
3539 { 3538 {
3540 /* If 'casemap' contains "keepascii" use ASCII style toupper(). */ 3539 // If 'casemap' contains "keepascii" use ASCII style toupper().
3541 if (a < 128 && (cmp_flags & CMP_KEEPASCII)) 3540 if (a < 128 && (cmp_flags & CMP_KEEPASCII))
3542 return TOUPPER_ASC(a); 3541 return TOUPPER_ASC(a);
3543 3542
3544 #if defined(HAVE_TOWUPPER) && defined(__STDC_ISO_10646__) 3543 #if defined(HAVE_TOWUPPER) && defined(__STDC_ISO_10646__)
3545 /* If towupper() is available and handles Unicode, use it. */ 3544 // If towupper() is available and handles Unicode, use it.
3546 if (!(cmp_flags & CMP_INTERNAL)) 3545 if (!(cmp_flags & CMP_INTERNAL))
3547 return towupper(a); 3546 return towupper(a);
3548 #endif 3547 #endif
3549 3548
3550 /* For characters below 128 use locale sensitive toupper(). */ 3549 // For characters below 128 use locale sensitive toupper().
3551 if (a < 128) 3550 if (a < 128)
3552 return TOUPPER_LOC(a); 3551 return TOUPPER_LOC(a);
3553 3552
3554 /* For any other characters use the above mapping table. */ 3553 // For any other characters use the above mapping table.
3555 return utf_convert(a, toUpper, (int)sizeof(toUpper)); 3554 return utf_convert(a, toUpper, (int)sizeof(toUpper));
3556 } 3555 }
3557 3556
3558 int 3557 int
3559 utf_islower(int a) 3558 utf_islower(int a)
3560 { 3559 {
3561 /* German sharp s is lower case but has no upper case equivalent. */ 3560 // German sharp s is lower case but has no upper case equivalent.
3562 return (utf_toupper(a) != a) || a == 0xdf; 3561 return (utf_toupper(a) != a) || a == 0xdf;
3563 } 3562 }
3564 3563
3565 /* 3564 /*
3566 * Return the lower-case equivalent of "a", which is a UCS-4 character. Use 3565 * Return the lower-case equivalent of "a", which is a UCS-4 character. Use
3567 * simple case folding. 3566 * simple case folding.
3568 */ 3567 */
3569 int 3568 int
3570 utf_tolower(int a) 3569 utf_tolower(int a)
3571 { 3570 {
3572 /* If 'casemap' contains "keepascii" use ASCII style tolower(). */ 3571 // If 'casemap' contains "keepascii" use ASCII style tolower().
3573 if (a < 128 && (cmp_flags & CMP_KEEPASCII)) 3572 if (a < 128 && (cmp_flags & CMP_KEEPASCII))
3574 return TOLOWER_ASC(a); 3573 return TOLOWER_ASC(a);
3575 3574
3576 #if defined(HAVE_TOWLOWER) && defined(__STDC_ISO_10646__) 3575 #if defined(HAVE_TOWLOWER) && defined(__STDC_ISO_10646__)
3577 /* If towlower() is available and handles Unicode, use it. */ 3576 // If towlower() is available and handles Unicode, use it.
3578 if (!(cmp_flags & CMP_INTERNAL)) 3577 if (!(cmp_flags & CMP_INTERNAL))
3579 return towlower(a); 3578 return towlower(a);
3580 #endif 3579 #endif
3581 3580
3582 /* For characters below 128 use locale sensitive tolower(). */ 3581 // For characters below 128 use locale sensitive tolower().
3583 if (a < 128) 3582 if (a < 128)
3584 return TOLOWER_LOC(a); 3583 return TOLOWER_LOC(a);
3585 3584
3586 /* For any other characters use the above mapping table. */ 3585 // For any other characters use the above mapping table.
3587 return utf_convert(a, toLower, (int)sizeof(toLower)); 3586 return utf_convert(a, toLower, (int)sizeof(toLower));
3588 } 3587 }
3589 3588
3590 int 3589 int
3591 utf_isupper(int a) 3590 utf_isupper(int a)
3617 cdiff = utf_fold(c1) - utf_fold(c2); 3616 cdiff = utf_fold(c1) - utf_fold(c2);
3618 if (cdiff != 0) 3617 if (cdiff != 0)
3619 return cdiff; 3618 return cdiff;
3620 } 3619 }
3621 3620
3622 /* some string ended or has an incomplete/illegal character sequence */ 3621 // some string ended or has an incomplete/illegal character sequence
3623 3622
3624 if (c1 == 0 || c2 == 0) 3623 if (c1 == 0 || c2 == 0)
3625 { 3624 {
3626 /* some string ended. shorter string is smaller */ 3625 // some string ended. shorter string is smaller
3627 if (c1 == 0 && c2 == 0) 3626 if (c1 == 0 && c2 == 0)
3628 return 0; 3627 return 0;
3629 return c1 == 0 ? -1 : 1; 3628 return c1 == 0 ? -1 : 1;
3630 } 3629 }
3631 3630
3632 /* Continue with bytewise comparison to produce some result that 3631 // Continue with bytewise comparison to produce some result that
3633 * would make comparison operations involving this function transitive. 3632 // would make comparison operations involving this function transitive.
3634 * 3633 //
3635 * If only one string had an error, comparison should be made with 3634 // If only one string had an error, comparison should be made with
3636 * folded version of the other string. In this case it is enough 3635 // folded version of the other string. In this case it is enough
3637 * to fold just one character to determine the result of comparison. */ 3636 // to fold just one character to determine the result of comparison.
3638 3637
3639 if (c1 != -1 && c2 == -1) 3638 if (c1 != -1 && c2 == -1)
3640 { 3639 {
3641 n1 = utf_char2bytes(utf_fold(c1), buffer); 3640 n1 = utf_char2bytes(utf_fold(c1), buffer);
3642 s1 = buffer; 3641 s1 = buffer;
3690 } 3689 }
3691 else 3690 else
3692 { 3691 {
3693 for (i = 0; i < n; i += l) 3692 for (i = 0; i < n; i += l)
3694 { 3693 {
3695 if (s1[i] == NUL && s2[i] == NUL) /* both strings end */ 3694 if (s1[i] == NUL && s2[i] == NUL) // both strings end
3696 return 0; 3695 return 0;
3697 3696
3698 l = (*mb_ptr2len)(s1 + i); 3697 l = (*mb_ptr2len)(s1 + i);
3699 if (l <= 1) 3698 if (l <= 1)
3700 { 3699 {
3701 /* Single byte: first check normally, then with ignore case. */ 3700 // Single byte: first check normally, then with ignore case.
3702 if (s1[i] != s2[i]) 3701 if (s1[i] != s2[i])
3703 { 3702 {
3704 cdiff = MB_TOLOWER(s1[i]) - MB_TOLOWER(s2[i]); 3703 cdiff = MB_TOLOWER(s1[i]) - MB_TOLOWER(s2[i]);
3705 if (cdiff != 0) 3704 if (cdiff != 0)
3706 return cdiff; 3705 return cdiff;
3707 } 3706 }
3708 } 3707 }
3709 else 3708 else
3710 { 3709 {
3711 /* For non-Unicode multi-byte don't ignore case. */ 3710 // For non-Unicode multi-byte don't ignore case.
3712 if (l > n - i) 3711 if (l > n - i)
3713 l = n - i; 3712 l = n - i;
3714 cdiff = STRNCMP(s1 + i, s2 + i, l); 3713 cdiff = STRNCMP(s1 + i, s2 + i, l);
3715 if (cdiff != 0) 3714 if (cdiff != 0)
3716 return cdiff; 3715 return cdiff;
3731 int rlen = 0; 3730 int rlen = 0;
3732 char_u *line; 3731 char_u *line;
3733 int clen; 3732 int clen;
3734 int i; 3733 int i;
3735 3734
3736 /* Get the byte length of the char under the cursor, including composing 3735 // Get the byte length of the char under the cursor, including composing
3737 * characters. */ 3736 // characters.
3738 line = ml_get_cursor(); 3737 line = ml_get_cursor();
3739 len = utfc_ptr2len(line); 3738 len = utfc_ptr2len(line);
3740 if (len == 0) 3739 if (len == 0)
3741 { 3740 {
3742 msg("NUL"); 3741 msg("NUL");
3746 clen = 0; 3745 clen = 0;
3747 for (i = 0; i < len; ++i) 3746 for (i = 0; i < len; ++i)
3748 { 3747 {
3749 if (clen == 0) 3748 if (clen == 0)
3750 { 3749 {
3751 /* start of (composing) character, get its length */ 3750 // start of (composing) character, get its length
3752 if (i > 0) 3751 if (i > 0)
3753 { 3752 {
3754 STRCPY(IObuff + rlen, "+ "); 3753 STRCPY(IObuff + rlen, "+ ");
3755 rlen += 2; 3754 rlen += 2;
3756 } 3755 }
3757 clen = utf_ptr2len(line + i); 3756 clen = utf_ptr2len(line + i);
3758 } 3757 }
3759 sprintf((char *)IObuff + rlen, "%02x ", 3758 sprintf((char *)IObuff + rlen, "%02x ",
3760 (line[i] == NL) ? NUL : line[i]); /* NUL is stored as NL */ 3759 (line[i] == NL) ? NUL : line[i]); // NUL is stored as NL
3761 --clen; 3760 --clen;
3762 rlen += (int)STRLEN(IObuff + rlen); 3761 rlen += (int)STRLEN(IObuff + rlen);
3763 if (rlen > IOSIZE - 20) 3762 if (rlen > IOSIZE - 20)
3764 break; 3763 break;
3765 } 3764 }
3782 static int 3781 static int
3783 dbcs_head_off(char_u *base, char_u *p) 3782 dbcs_head_off(char_u *base, char_u *p)
3784 { 3783 {
3785 char_u *q; 3784 char_u *q;
3786 3785
3787 /* It can't be a trailing byte when not using DBCS, at the start of the 3786 // It can't be a trailing byte when not using DBCS, at the start of the
3788 * string or the previous byte can't start a double-byte. */ 3787 // string or the previous byte can't start a double-byte.
3789 if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL) 3788 if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL)
3790 return 0; 3789 return 0;
3791 3790
3792 /* This is slow: need to start at the base and go forward until the 3791 // This is slow: need to start at the base and go forward until the
3793 * byte we are looking for. Return 1 when we went past it, 0 otherwise. */ 3792 // byte we are looking for. Return 1 when we went past it, 0 otherwise.
3794 q = base; 3793 q = base;
3795 while (q < p) 3794 while (q < p)
3796 q += dbcs_ptr2len(q); 3795 q += dbcs_ptr2len(q);
3797 return (q == p) ? 0 : 1; 3796 return (q == p) ? 0 : 1;
3798 } 3797 }
3804 int 3803 int
3805 dbcs_screen_head_off(char_u *base, char_u *p) 3804 dbcs_screen_head_off(char_u *base, char_u *p)
3806 { 3805 {
3807 char_u *q; 3806 char_u *q;
3808 3807
3809 /* It can't be a trailing byte when not using DBCS, at the start of the 3808 // It can't be a trailing byte when not using DBCS, at the start of the
3810 * string or the previous byte can't start a double-byte. 3809 // string or the previous byte can't start a double-byte.
3811 * For euc-jp an 0x8e byte in the previous cell always means we have a 3810 // For euc-jp an 0x8e byte in the previous cell always means we have a
3812 * lead byte in the current cell. */ 3811 // lead byte in the current cell.
3813 if (p <= base 3812 if (p <= base
3814 || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e) 3813 || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e)
3815 || MB_BYTE2LEN(p[-1]) == 1 3814 || MB_BYTE2LEN(p[-1]) == 1
3816 || *p == NUL) 3815 || *p == NUL)
3817 return 0; 3816 return 0;
3818 3817
3819 /* This is slow: need to start at the base and go forward until the 3818 // This is slow: need to start at the base and go forward until the
3820 * byte we are looking for. Return 1 when we went past it, 0 otherwise. 3819 // byte we are looking for. Return 1 when we went past it, 0 otherwise.
3821 * For DBCS_JPNU look out for 0x8e, which means the second byte is not 3820 // For DBCS_JPNU look out for 0x8e, which means the second byte is not
3822 * stored as the next byte. */ 3821 // stored as the next byte.
3823 q = base; 3822 q = base;
3824 while (q < p) 3823 while (q < p)
3825 { 3824 {
3826 if (enc_dbcs == DBCS_JPNU && *q == 0x8e) 3825 if (enc_dbcs == DBCS_JPNU && *q == 0x8e)
3827 ++q; 3826 ++q;
3840 int len; 3839 int len;
3841 #ifdef FEAT_ARABIC 3840 #ifdef FEAT_ARABIC
3842 char_u *j; 3841 char_u *j;
3843 #endif 3842 #endif
3844 3843
3845 if (*p < 0x80) /* be quick for ASCII */ 3844 if (*p < 0x80) // be quick for ASCII
3846 return 0; 3845 return 0;
3847 3846
3848 /* Skip backwards over trailing bytes: 10xx.xxxx 3847 // Skip backwards over trailing bytes: 10xx.xxxx
3849 * Skip backwards again if on a composing char. */ 3848 // Skip backwards again if on a composing char.
3850 for (q = p; ; --q) 3849 for (q = p; ; --q)
3851 { 3850 {
3852 /* Move s to the last byte of this char. */ 3851 // Move s to the last byte of this char.
3853 for (s = q; (s[1] & 0xc0) == 0x80; ++s) 3852 for (s = q; (s[1] & 0xc0) == 0x80; ++s)
3854 ; 3853 ;
3855 /* Move q to the first byte of this char. */ 3854 // Move q to the first byte of this char.
3856 while (q > base && (*q & 0xc0) == 0x80) 3855 while (q > base && (*q & 0xc0) == 0x80)
3857 --q; 3856 --q;
3858 /* Check for illegal sequence. Do allow an illegal byte after where we 3857 // Check for illegal sequence. Do allow an illegal byte after where we
3859 * started. */ 3858 // started.
3860 len = utf8len_tab[*q]; 3859 len = utf8len_tab[*q];
3861 if (len != (int)(s - q + 1) && len != (int)(p - q + 1)) 3860 if (len != (int)(s - q + 1) && len != (int)(p - q + 1))
3862 return 0; 3861 return 0;
3863 3862
3864 if (q <= base) 3863 if (q <= base)
3869 continue; 3868 continue;
3870 3869
3871 #ifdef FEAT_ARABIC 3870 #ifdef FEAT_ARABIC
3872 if (arabic_maycombine(c)) 3871 if (arabic_maycombine(c))
3873 { 3872 {
3874 /* Advance to get a sneak-peak at the next char */ 3873 // Advance to get a sneak-peak at the next char
3875 j = q; 3874 j = q;
3876 --j; 3875 --j;
3877 /* Move j to the first byte of this char. */ 3876 // Move j to the first byte of this char.
3878 while (j > base && (*j & 0xc0) == 0x80) 3877 while (j > base && (*j & 0xc0) == 0x80)
3879 --j; 3878 --j;
3880 if (arabic_combine(utf_ptr2char(j), c)) 3879 if (arabic_combine(utf_ptr2char(j), c))
3881 continue; 3880 continue;
3882 } 3881 }
3911 int i; 3910 int i;
3912 int j; 3911 int j;
3913 3912
3914 if (enc_utf8) 3913 if (enc_utf8)
3915 { 3914 {
3916 if (*p < 0x80) /* be quick for ASCII */ 3915 if (*p < 0x80) // be quick for ASCII
3917 return 0; 3916 return 0;
3918 3917
3919 /* Find the next character that isn't 10xx.xxxx */ 3918 // Find the next character that isn't 10xx.xxxx
3920 for (i = 0; (p[i] & 0xc0) == 0x80; ++i) 3919 for (i = 0; (p[i] & 0xc0) == 0x80; ++i)
3921 ; 3920 ;
3922 if (i > 0) 3921 if (i > 0)
3923 { 3922 {
3924 /* Check for illegal sequence. */ 3923 // Check for illegal sequence.
3925 for (j = 0; p - j > base; ++j) 3924 for (j = 0; p - j > base; ++j)
3926 if ((p[-j] & 0xc0) != 0x80) 3925 if ((p[-j] & 0xc0) != 0x80)
3927 break; 3926 break;
3928 if (utf8len_tab[p[-j]] != i + j) 3927 if (utf8len_tab[p[-j]] != i + j)
3929 return 0; 3928 return 0;
3930 } 3929 }
3931 return i; 3930 return i;
3932 } 3931 }
3933 3932
3934 /* Only need to check if we're on a trail byte, it doesn't matter if we 3933 // Only need to check if we're on a trail byte, it doesn't matter if we
3935 * want the offset to the next or current character. */ 3934 // want the offset to the next or current character.
3936 return (*mb_head_off)(base, p); 3935 return (*mb_head_off)(base, p);
3937 } 3936 }
3938 3937
3939 /* 3938 /*
3940 * Return the offset from "p" to the last byte of the character it points 3939 * Return the offset from "p" to the last byte of the character it points
3949 if (*p == NUL) 3948 if (*p == NUL)
3950 return 0; 3949 return 0;
3951 3950
3952 if (enc_utf8) 3951 if (enc_utf8)
3953 { 3952 {
3954 /* Find the last character that is 10xx.xxxx */ 3953 // Find the last character that is 10xx.xxxx
3955 for (i = 0; (p[i + 1] & 0xc0) == 0x80; ++i) 3954 for (i = 0; (p[i + 1] & 0xc0) == 0x80; ++i)
3956 ; 3955 ;
3957 /* Check for illegal sequence. */ 3956 // Check for illegal sequence.
3958 for (j = 0; p - j > base; ++j) 3957 for (j = 0; p - j > base; ++j)
3959 if ((p[-j] & 0xc0) != 0x80) 3958 if ((p[-j] & 0xc0) != 0x80)
3960 break; 3959 break;
3961 if (utf8len_tab[p[-j]] != i + j + 1) 3960 if (utf8len_tab[p[-j]] != i + j + 1)
3962 return 0; 3961 return 0;
3963 return i; 3962 return i;
3964 } 3963 }
3965 3964
3966 /* It can't be the first byte if a double-byte when not using DBCS, at the 3965 // It can't be the first byte if a double-byte when not using DBCS, at the
3967 * end of the string or the byte can't start a double-byte. */ 3966 // end of the string or the byte can't start a double-byte.
3968 if (enc_dbcs == 0 || p[1] == NUL || MB_BYTE2LEN(*p) == 1) 3967 if (enc_dbcs == 0 || p[1] == NUL || MB_BYTE2LEN(*p) == 1)
3969 return 0; 3968 return 0;
3970 3969
3971 /* Return 1 when on the lead byte, 0 when on the tail byte. */ 3970 // Return 1 when on the lead byte, 0 when on the tail byte.
3972 return 1 - dbcs_head_off(base, p); 3971 return 1 - dbcs_head_off(base, p);
3973 } 3972 }
3974 3973
3975 /* 3974 /*
3976 * Find the next illegal byte sequence. 3975 * Find the next illegal byte sequence.
3985 char_u *tofree = NULL; 3984 char_u *tofree = NULL;
3986 3985
3987 vimconv.vc_type = CONV_NONE; 3986 vimconv.vc_type = CONV_NONE;
3988 if (enc_utf8 && (enc_canon_props(curbuf->b_p_fenc) & ENC_8BIT)) 3987 if (enc_utf8 && (enc_canon_props(curbuf->b_p_fenc) & ENC_8BIT))
3989 { 3988 {
3990 /* 'encoding' is "utf-8" but we are editing a 8-bit encoded file, 3989 // 'encoding' is "utf-8" but we are editing a 8-bit encoded file,
3991 * possibly a utf-8 file with illegal bytes. Setup for conversion 3990 // possibly a utf-8 file with illegal bytes. Setup for conversion
3992 * from utf-8 to 'fileencoding'. */ 3991 // from utf-8 to 'fileencoding'.
3993 convert_setup(&vimconv, p_enc, curbuf->b_p_fenc); 3992 convert_setup(&vimconv, p_enc, curbuf->b_p_fenc);
3994 } 3993 }
3995 3994
3996 curwin->w_cursor.coladd = 0; 3995 curwin->w_cursor.coladd = 0;
3997 for (;;) 3996 for (;;)
4006 p = tofree; 4005 p = tofree;
4007 } 4006 }
4008 4007
4009 while (*p != NUL) 4008 while (*p != NUL)
4010 { 4009 {
4011 /* Illegal means that there are not enough trail bytes (checked by 4010 // Illegal means that there are not enough trail bytes (checked by
4012 * utf_ptr2len()) or too many of them (overlong sequence). */ 4011 // utf_ptr2len()) or too many of them (overlong sequence).
4013 len = utf_ptr2len(p); 4012 len = utf_ptr2len(p);
4014 if (*p >= 0x80 && (len == 1 4013 if (*p >= 0x80 && (len == 1
4015 || utf_char2len(utf_ptr2char(p)) != len)) 4014 || utf_char2len(utf_ptr2char(p)) != len))
4016 { 4015 {
4017 if (vimconv.vc_type == CONV_NONE) 4016 if (vimconv.vc_type == CONV_NONE)
4035 break; 4034 break;
4036 ++curwin->w_cursor.lnum; 4035 ++curwin->w_cursor.lnum;
4037 curwin->w_cursor.col = 0; 4036 curwin->w_cursor.col = 0;
4038 } 4037 }
4039 4038
4040 /* didn't find it: don't move and beep */ 4039 // didn't find it: don't move and beep
4041 curwin->w_cursor = pos; 4040 curwin->w_cursor = pos;
4042 beep_flush(); 4041 beep_flush();
4043 4042
4044 theend: 4043 theend:
4045 vim_free(tofree); 4044 vim_free(tofree);
4060 4059
4061 while (end == NULL ? *p != NUL : p < end) 4060 while (end == NULL ? *p != NUL : p < end)
4062 { 4061 {
4063 l = utf8len_tab_zero[*p]; 4062 l = utf8len_tab_zero[*p];
4064 if (l == 0) 4063 if (l == 0)
4065 return FALSE; /* invalid lead byte */ 4064 return FALSE; // invalid lead byte
4066 if (end != NULL && p + l > end) 4065 if (end != NULL && p + l > end)
4067 return FALSE; /* incomplete byte sequence */ 4066 return FALSE; // incomplete byte sequence
4068 ++p; 4067 ++p;
4069 while (--l > 0) 4068 while (--l > 0)
4070 if ((*p++ & 0xc0) != 0x80) 4069 if ((*p++ & 0xc0) != 0x80)
4071 return FALSE; /* invalid trail byte */ 4070 return FALSE; // invalid trail byte
4072 } 4071 }
4073 return TRUE; 4072 return TRUE;
4074 } 4073 }
4075 #endif 4074 #endif
4076 4075
4079 * Special version of mb_tail_off() for use in ScreenLines[]. 4078 * Special version of mb_tail_off() for use in ScreenLines[].
4080 */ 4079 */
4081 int 4080 int
4082 dbcs_screen_tail_off(char_u *base, char_u *p) 4081 dbcs_screen_tail_off(char_u *base, char_u *p)
4083 { 4082 {
4084 /* It can't be the first byte if a double-byte when not using DBCS, at the 4083 // It can't be the first byte if a double-byte when not using DBCS, at the
4085 * end of the string or the byte can't start a double-byte. 4084 // end of the string or the byte can't start a double-byte.
4086 * For euc-jp an 0x8e byte always means we have a lead byte in the current 4085 // For euc-jp an 0x8e byte always means we have a lead byte in the current
4087 * cell. */ 4086 // cell.
4088 if (*p == NUL || p[1] == NUL 4087 if (*p == NUL || p[1] == NUL
4089 || (enc_dbcs == DBCS_JPNU && *p == 0x8e) 4088 || (enc_dbcs == DBCS_JPNU && *p == 0x8e)
4090 || MB_BYTE2LEN(*p) == 1) 4089 || MB_BYTE2LEN(*p) == 1)
4091 return 0; 4090 return 0;
4092 4091
4093 /* Return 1 when on the lead byte, 0 when on the tail byte. */ 4092 // Return 1 when on the lead byte, 0 when on the tail byte.
4094 return 1 - dbcs_screen_head_off(base, p); 4093 return 1 - dbcs_screen_head_off(base, p);
4095 } 4094 }
4096 #endif 4095 #endif
4097 4096
4098 /* 4097 /*
4120 p = ml_get_buf(buf, lp->lnum, FALSE); 4119 p = ml_get_buf(buf, lp->lnum, FALSE);
4121 if (*p == NUL || (int)STRLEN(p) < lp->col) 4120 if (*p == NUL || (int)STRLEN(p) < lp->col)
4122 lp->col = 0; 4121 lp->col = 0;
4123 else 4122 else
4124 lp->col -= (*mb_head_off)(p, p + lp->col); 4123 lp->col -= (*mb_head_off)(p, p + lp->col);
4125 /* Reset "coladd" when the cursor would be on the right half of a 4124 // Reset "coladd" when the cursor would be on the right half of a
4126 * double-wide character. */ 4125 // double-wide character.
4127 if (lp->coladd == 1 4126 if (lp->coladd == 1
4128 && p[lp->col] != TAB 4127 && p[lp->col] != TAB
4129 && vim_isprintc((*mb_ptr2char)(p + lp->col)) 4128 && vim_isprintc((*mb_ptr2char)(p + lp->col))
4130 && ptr2cells(p + lp->col) > 1) 4129 && ptr2cells(p + lp->col) > 1)
4131 lp->coladd = 0; 4130 lp->coladd = 0;
4135 /* 4134 /*
4136 * Return a pointer to the character before "*p", if there is one. 4135 * Return a pointer to the character before "*p", if there is one.
4137 */ 4136 */
4138 char_u * 4137 char_u *
4139 mb_prevptr( 4138 mb_prevptr(
4140 char_u *line, /* start of the string */ 4139 char_u *line, // start of the string
4141 char_u *p) 4140 char_u *p)
4142 { 4141 {
4143 if (p > line) 4142 if (p > line)
4144 MB_PTR_BACK(line, p); 4143 MB_PTR_BACK(line, p);
4145 return p; 4144 return p;
4194 static char_u buf[6]; 4193 static char_u buf[6];
4195 int n; 4194 int n;
4196 int m = 0; 4195 int m = 0;
4197 char_u *str = *pp; 4196 char_u *str = *pp;
4198 4197
4199 /* Must translate K_SPECIAL KS_SPECIAL KE_FILLER to K_SPECIAL and CSI 4198 // Must translate K_SPECIAL KS_SPECIAL KE_FILLER to K_SPECIAL and CSI
4200 * KS_EXTRA KE_CSI to CSI. 4199 // KS_EXTRA KE_CSI to CSI.
4201 * Maximum length of a utf-8 character is 4 bytes. */ 4200 // Maximum length of a utf-8 character is 4 bytes.
4202 for (n = 0; str[n] != NUL && m < 4; ++n) 4201 for (n = 0; str[n] != NUL && m < 4; ++n)
4203 { 4202 {
4204 if (str[n] == K_SPECIAL 4203 if (str[n] == K_SPECIAL
4205 && str[n + 1] == KS_SPECIAL 4204 && str[n + 1] == KS_SPECIAL
4206 && str[n + 2] == KE_FILLER) 4205 && str[n + 2] == KE_FILLER)
4222 else if (str[n] == K_SPECIAL 4221 else if (str[n] == K_SPECIAL
4223 # ifdef FEAT_GUI 4222 # ifdef FEAT_GUI
4224 || str[n] == CSI 4223 || str[n] == CSI
4225 # endif 4224 # endif
4226 ) 4225 )
4227 break; /* a special key can't be a multibyte char */ 4226 break; // a special key can't be a multibyte char
4228 else 4227 else
4229 buf[m++] = str[n]; 4228 buf[m++] = str[n];
4230 buf[m] = NUL; 4229 buf[m] = NUL;
4231 4230
4232 /* Return a multi-byte character if it's found. An illegal sequence 4231 // Return a multi-byte character if it's found. An illegal sequence
4233 * will result in a 1 here. */ 4232 // will result in a 1 here.
4234 if ((*mb_ptr2len)(buf) > 1) 4233 if ((*mb_ptr2len)(buf) > 1)
4235 { 4234 {
4236 *pp = str + n + 1; 4235 *pp = str + n + 1;
4237 return buf; 4236 return buf;
4238 } 4237 }
4239 4238
4240 /* Bail out quickly for ASCII. */ 4239 // Bail out quickly for ASCII.
4241 if (buf[0] < 128) 4240 if (buf[0] < 128)
4242 break; 4241 break;
4243 } 4242 }
4244 return NULL; 4243 return NULL;
4245 } 4244 }
4307 char_u *p, *s; 4306 char_u *p, *s;
4308 int i; 4307 int i;
4309 4308
4310 if (STRCMP(enc, "default") == 0) 4309 if (STRCMP(enc, "default") == 0)
4311 { 4310 {
4312 /* Use the default encoding as it's found by set_init_1(). */ 4311 // Use the default encoding as it's found by set_init_1().
4313 r = get_encoding_default(); 4312 r = get_encoding_default();
4314 if (r == NULL) 4313 if (r == NULL)
4315 r = (char_u *)"latin1"; 4314 r = (char_u *)"latin1";
4316 return vim_strsave(r); 4315 return vim_strsave(r);
4317 } 4316 }
4318 4317
4319 /* copy "enc" to allocated memory, with room for two '-' */ 4318 // copy "enc" to allocated memory, with room for two '-'
4320 r = alloc(STRLEN(enc) + 3); 4319 r = alloc(STRLEN(enc) + 3);
4321 if (r != NULL) 4320 if (r != NULL)
4322 { 4321 {
4323 /* Make it all lower case and replace '_' with '-'. */ 4322 // Make it all lower case and replace '_' with '-'.
4324 p = r; 4323 p = r;
4325 for (s = enc; *s != NUL; ++s) 4324 for (s = enc; *s != NUL; ++s)
4326 { 4325 {
4327 if (*s == '_') 4326 if (*s == '_')
4328 *p++ = '-'; 4327 *p++ = '-';
4329 else 4328 else
4330 *p++ = TOLOWER_ASC(*s); 4329 *p++ = TOLOWER_ASC(*s);
4331 } 4330 }
4332 *p = NUL; 4331 *p = NUL;
4333 4332
4334 /* Skip "2byte-" and "8bit-". */ 4333 // Skip "2byte-" and "8bit-".
4335 p = enc_skip(r); 4334 p = enc_skip(r);
4336 4335
4337 /* Change "microsoft-cp" to "cp". Used in some spell files. */ 4336 // Change "microsoft-cp" to "cp". Used in some spell files.
4338 if (STRNCMP(p, "microsoft-cp", 12) == 0) 4337 if (STRNCMP(p, "microsoft-cp", 12) == 0)
4339 STRMOVE(p, p + 10); 4338 STRMOVE(p, p + 10);
4340 4339
4341 /* "iso8859" -> "iso-8859" */ 4340 // "iso8859" -> "iso-8859"
4342 if (STRNCMP(p, "iso8859", 7) == 0) 4341 if (STRNCMP(p, "iso8859", 7) == 0)
4343 { 4342 {
4344 STRMOVE(p + 4, p + 3); 4343 STRMOVE(p + 4, p + 3);
4345 p[3] = '-'; 4344 p[3] = '-';
4346 } 4345 }
4347 4346
4348 /* "iso-8859n" -> "iso-8859-n" */ 4347 // "iso-8859n" -> "iso-8859-n"
4349 if (STRNCMP(p, "iso-8859", 8) == 0 && p[8] != '-') 4348 if (STRNCMP(p, "iso-8859", 8) == 0 && p[8] != '-')
4350 { 4349 {
4351 STRMOVE(p + 9, p + 8); 4350 STRMOVE(p + 9, p + 8);
4352 p[8] = '-'; 4351 p[8] = '-';
4353 } 4352 }
4354 4353
4355 /* "latin-N" -> "latinN" */ 4354 // "latin-N" -> "latinN"
4356 if (STRNCMP(p, "latin-", 6) == 0) 4355 if (STRNCMP(p, "latin-", 6) == 0)
4357 STRMOVE(p + 5, p + 6); 4356 STRMOVE(p + 5, p + 6);
4358 4357
4359 if (enc_canon_search(p) >= 0) 4358 if (enc_canon_search(p) >= 0)
4360 { 4359 {
4361 /* canonical name can be used unmodified */ 4360 // canonical name can be used unmodified
4362 if (p != r) 4361 if (p != r)
4363 STRMOVE(r, p); 4362 STRMOVE(r, p);
4364 } 4363 }
4365 else if ((i = enc_alias_search(p)) >= 0) 4364 else if ((i = enc_alias_search(p)) >= 0)
4366 { 4365 {
4367 /* alias recognized, get canonical name */ 4366 // alias recognized, get canonical name
4368 vim_free(r); 4367 vim_free(r);
4369 r = vim_strsave((char_u *)enc_canon_table[i].name); 4368 r = vim_strsave((char_u *)enc_canon_table[i].name);
4370 } 4369 }
4371 } 4370 }
4372 return r; 4371 return r;
4412 s = getenv("LANG"); 4411 s = getenv("LANG");
4413 4412
4414 if (s == NULL || *s == NUL) 4413 if (s == NULL || *s == NUL)
4415 return NULL; 4414 return NULL;
4416 4415
4417 /* The most generic locale format is: 4416 // The most generic locale format is:
4418 * language[_territory][.codeset][@modifier][+special][,[sponsor][_revision]] 4417 // language[_territory][.codeset][@modifier][+special][,[sponsor][_revision]]
4419 * If there is a '.' remove the part before it. 4418 // If there is a '.' remove the part before it.
4420 * if there is something after the codeset, remove it. 4419 // if there is something after the codeset, remove it.
4421 * Make the name lowercase and replace '_' with '-'. 4420 // Make the name lowercase and replace '_' with '-'.
4422 * Exception: "ja_JP.EUC" == "euc-jp", "zh_CN.EUC" = "euc-cn", 4421 // Exception: "ja_JP.EUC" == "euc-jp", "zh_CN.EUC" = "euc-cn",
4423 * "ko_KR.EUC" == "euc-kr" 4422 // "ko_KR.EUC" == "euc-kr"
4424 */
4425 if ((p = (char *)vim_strchr((char_u *)s, '.')) != NULL) 4423 if ((p = (char *)vim_strchr((char_u *)s, '.')) != NULL)
4426 { 4424 {
4427 if (p > s + 2 && STRNICMP(p + 1, "EUC", 3) == 0 4425 if (p > s + 2 && STRNICMP(p + 1, "EUC", 3) == 0
4428 && !isalnum((int)p[4]) && p[4] != '-' && p[-3] == '_') 4426 && !isalnum((int)p[4]) && p[4] != '-' && p[-3] == '_')
4429 { 4427 {
4430 /* copy "XY.EUC" to "euc-XY" to buf[10] */ 4428 // copy "XY.EUC" to "euc-XY" to buf[10]
4431 STRCPY(buf + 10, "euc-"); 4429 STRCPY(buf + 10, "euc-");
4432 buf[14] = p[-2]; 4430 buf[14] = p[-2];
4433 buf[15] = p[-1]; 4431 buf[15] = p[-1];
4434 buf[16] = 0; 4432 buf[16] = 0;
4435 s = buf + 10; 4433 s = buf + 10;
4534 char *p; 4532 char *p;
4535 size_t tolen; 4533 size_t tolen;
4536 static int iconv_ok = -1; 4534 static int iconv_ok = -1;
4537 4535
4538 if (iconv_ok == FALSE) 4536 if (iconv_ok == FALSE)
4539 return (void *)-1; /* detected a broken iconv() previously */ 4537 return (void *)-1; // detected a broken iconv() previously
4540 4538
4541 #ifdef DYNAMIC_ICONV 4539 #ifdef DYNAMIC_ICONV
4542 /* Check if the iconv.dll can be found. */ 4540 // Check if the iconv.dll can be found.
4543 if (!iconv_enabled(TRUE)) 4541 if (!iconv_enabled(TRUE))
4544 return (void *)-1; 4542 return (void *)-1;
4545 #endif 4543 #endif
4546 4544
4547 fd = iconv_open((char *)enc_skip(to), (char *)enc_skip(from)); 4545 fd = iconv_open((char *)enc_skip(to), (char *)enc_skip(from));
4600 fromlen = slen; 4598 fromlen = slen;
4601 for (;;) 4599 for (;;)
4602 { 4600 {
4603 if (len == 0 || ICONV_ERRNO == ICONV_E2BIG) 4601 if (len == 0 || ICONV_ERRNO == ICONV_E2BIG)
4604 { 4602 {
4605 /* Allocate enough room for most conversions. When re-allocating 4603 // Allocate enough room for most conversions. When re-allocating
4606 * increase the buffer size. */ 4604 // increase the buffer size.
4607 len = len + fromlen * 2 + 40; 4605 len = len + fromlen * 2 + 40;
4608 p = alloc(len); 4606 p = alloc(len);
4609 if (p != NULL && done > 0) 4607 if (p != NULL && done > 0)
4610 mch_memmove(p, result, done); 4608 mch_memmove(p, result, done);
4611 vim_free(result); 4609 vim_free(result);
4612 result = p; 4610 result = p;
4613 if (result == NULL) /* out of memory */ 4611 if (result == NULL) // out of memory
4614 break; 4612 break;
4615 } 4613 }
4616 4614
4617 to = (char *)result + done; 4615 to = (char *)result + done;
4618 tolen = len - done - 2; 4616 tolen = len - done - 2;
4619 /* Avoid a warning for systems with a wrong iconv() prototype by 4617 // Avoid a warning for systems with a wrong iconv() prototype by
4620 * casting the second argument to void *. */ 4618 // casting the second argument to void *.
4621 if (iconv(vcp->vc_fd, (void *)&from, &fromlen, &to, &tolen) 4619 if (iconv(vcp->vc_fd, (void *)&from, &fromlen, &to, &tolen)
4622 != (size_t)-1) 4620 != (size_t)-1)
4623 { 4621 {
4624 /* Finished, append a NUL. */ 4622 // Finished, append a NUL.
4625 *to = NUL; 4623 *to = NUL;
4626 break; 4624 break;
4627 } 4625 }
4628 4626
4629 /* Check both ICONV_EINVAL and EINVAL, because the dynamically loaded 4627 // Check both ICONV_EINVAL and EINVAL, because the dynamically loaded
4630 * iconv library may use one of them. */ 4628 // iconv library may use one of them.
4631 if (!vcp->vc_fail && unconvlenp != NULL 4629 if (!vcp->vc_fail && unconvlenp != NULL
4632 && (ICONV_ERRNO == ICONV_EINVAL || ICONV_ERRNO == EINVAL)) 4630 && (ICONV_ERRNO == ICONV_EINVAL || ICONV_ERRNO == EINVAL))
4633 { 4631 {
4634 /* Handle an incomplete sequence at the end. */ 4632 // Handle an incomplete sequence at the end.
4635 *to = NUL; 4633 *to = NUL;
4636 *unconvlenp = (int)fromlen; 4634 *unconvlenp = (int)fromlen;
4637 break; 4635 break;
4638 } 4636 }
4639 4637
4640 /* Check both ICONV_EILSEQ and EILSEQ, because the dynamically loaded 4638 // Check both ICONV_EILSEQ and EILSEQ, because the dynamically loaded
4641 * iconv library may use one of them. */ 4639 // iconv library may use one of them.
4642 else if (!vcp->vc_fail 4640 else if (!vcp->vc_fail
4643 && (ICONV_ERRNO == ICONV_EILSEQ || ICONV_ERRNO == EILSEQ 4641 && (ICONV_ERRNO == ICONV_EILSEQ || ICONV_ERRNO == EILSEQ
4644 || ICONV_ERRNO == ICONV_EINVAL || ICONV_ERRNO == EINVAL)) 4642 || ICONV_ERRNO == ICONV_EINVAL || ICONV_ERRNO == EINVAL))
4645 { 4643 {
4646 /* Can't convert: insert a '?' and skip a character. This assumes 4644 // Can't convert: insert a '?' and skip a character. This assumes
4647 * conversion from 'encoding' to something else. In other 4645 // conversion from 'encoding' to something else. In other
4648 * situations we don't know what to skip anyway. */ 4646 // situations we don't know what to skip anyway.
4649 *to++ = '?'; 4647 *to++ = '?';
4650 if ((*mb_ptr2cells)((char_u *)from) > 1) 4648 if ((*mb_ptr2cells)((char_u *)from) > 1)
4651 *to++ = '?'; 4649 *to++ = '?';
4652 if (enc_utf8) 4650 if (enc_utf8)
4653 l = utfc_ptr2len_len((char_u *)from, (int)fromlen); 4651 l = utfc_ptr2len_len((char_u *)from, (int)fromlen);
4660 from += l; 4658 from += l;
4661 fromlen -= l; 4659 fromlen -= l;
4662 } 4660 }
4663 else if (ICONV_ERRNO != ICONV_E2BIG) 4661 else if (ICONV_ERRNO != ICONV_E2BIG)
4664 { 4662 {
4665 /* conversion failed */ 4663 // conversion failed
4666 VIM_CLEAR(result); 4664 VIM_CLEAR(result);
4667 break; 4665 break;
4668 } 4666 }
4669 /* Not enough room or skipping illegal sequence. */ 4667 // Not enough room or skipping illegal sequence.
4670 done = to - (char *)result; 4668 done = to - (char *)result;
4671 } 4669 }
4672 4670
4673 if (resultlenp != NULL && result != NULL) 4671 if (resultlenp != NULL && result != NULL)
4674 *resultlenp = (int)(to - (char *)result); 4672 *resultlenp = (int)(to - (char *)result);
4678 # if defined(DYNAMIC_ICONV) || defined(PROTO) 4676 # if defined(DYNAMIC_ICONV) || defined(PROTO)
4679 /* 4677 /*
4680 * Dynamically load the "iconv.dll" on Win32. 4678 * Dynamically load the "iconv.dll" on Win32.
4681 */ 4679 */
4682 4680
4683 # ifndef DYNAMIC_ICONV /* must be generating prototypes */ 4681 # ifndef DYNAMIC_ICONV // must be generating prototypes
4684 # define HINSTANCE int 4682 # define HINSTANCE int
4685 # endif 4683 # endif
4686 static HINSTANCE hIconvDLL = 0; 4684 static HINSTANCE hIconvDLL = 0;
4687 static HINSTANCE hMsvcrtDLL = 0; 4685 static HINSTANCE hMsvcrtDLL = 0;
4688 4686
4703 iconv_enabled(int verbose) 4701 iconv_enabled(int verbose)
4704 { 4702 {
4705 if (hIconvDLL != 0 && hMsvcrtDLL != 0) 4703 if (hIconvDLL != 0 && hMsvcrtDLL != 0)
4706 return TRUE; 4704 return TRUE;
4707 4705
4708 /* The iconv DLL file goes under different names, try them all. 4706 // The iconv DLL file goes under different names, try them all.
4709 * Do the "2" version first, it's newer. */ 4707 // Do the "2" version first, it's newer.
4710 #ifdef DYNAMIC_ICONV_DLL_ALT2 4708 #ifdef DYNAMIC_ICONV_DLL_ALT2
4711 if (hIconvDLL == 0) 4709 if (hIconvDLL == 0)
4712 hIconvDLL = vimLoadLib(DYNAMIC_ICONV_DLL_ALT2); 4710 hIconvDLL = vimLoadLib(DYNAMIC_ICONV_DLL_ALT2);
4713 #endif 4711 #endif
4714 #ifdef DYNAMIC_ICONV_DLL_ALT3 4712 #ifdef DYNAMIC_ICONV_DLL_ALT3
4724 4722
4725 if (hIconvDLL != 0) 4723 if (hIconvDLL != 0)
4726 hMsvcrtDLL = vimLoadLib(DYNAMIC_MSVCRT_DLL); 4724 hMsvcrtDLL = vimLoadLib(DYNAMIC_MSVCRT_DLL);
4727 if (hIconvDLL == 0 || hMsvcrtDLL == 0) 4725 if (hIconvDLL == 0 || hMsvcrtDLL == 0)
4728 { 4726 {
4729 /* Only give the message when 'verbose' is set, otherwise it might be 4727 // Only give the message when 'verbose' is set, otherwise it might be
4730 * done whenever a conversion is attempted. */ 4728 // done whenever a conversion is attempted.
4731 if (verbose && p_verbose > 0) 4729 if (verbose && p_verbose > 0)
4732 { 4730 {
4733 verbose_enter(); 4731 verbose_enter();
4734 semsg(_(e_loadlib), 4732 semsg(_(e_loadlib),
4735 hIconvDLL == 0 ? DYNAMIC_ICONV_DLL : DYNAMIC_MSVCRT_DLL); 4733 hIconvDLL == 0 ? DYNAMIC_ICONV_DLL : DYNAMIC_MSVCRT_DLL);
4762 } 4760 }
4763 4761
4764 void 4762 void
4765 iconv_end(void) 4763 iconv_end(void)
4766 { 4764 {
4767 /* Don't use iconv() when inputting or outputting characters. */ 4765 // Don't use iconv() when inputting or outputting characters.
4768 if (input_conv.vc_type == CONV_ICONV) 4766 if (input_conv.vc_type == CONV_ICONV)
4769 convert_setup(&input_conv, NULL, NULL); 4767 convert_setup(&input_conv, NULL, NULL);
4770 if (output_conv.vc_type == CONV_ICONV) 4768 if (output_conv.vc_type == CONV_ICONV)
4771 convert_setup(&output_conv, NULL, NULL); 4769 convert_setup(&output_conv, NULL, NULL);
4772 4770
4775 if (hMsvcrtDLL != 0) 4773 if (hMsvcrtDLL != 0)
4776 FreeLibrary(hMsvcrtDLL); 4774 FreeLibrary(hMsvcrtDLL);
4777 hIconvDLL = 0; 4775 hIconvDLL = 0;
4778 hMsvcrtDLL = 0; 4776 hMsvcrtDLL = 0;
4779 } 4777 }
4780 # endif /* DYNAMIC_ICONV */ 4778 # endif // DYNAMIC_ICONV
4781 # endif /* USE_ICONV */ 4779 # endif // USE_ICONV
4782 4780
4783 4781
4784 #ifdef FEAT_GUI 4782 #ifdef FEAT_GUI
4785 # define USE_IMACTIVATEFUNC (!gui.in_use && *p_imaf != NUL) 4783 # define USE_IMACTIVATEFUNC (!gui.in_use && *p_imaf != NUL)
4786 # define USE_IMSTATUSFUNC (!gui.in_use && *p_imsf != NUL) 4784 # define USE_IMSTATUSFUNC (!gui.in_use && *p_imsf != NUL)
4805 static int 4803 static int
4806 call_imstatusfunc(void) 4804 call_imstatusfunc(void)
4807 { 4805 {
4808 int is_active; 4806 int is_active;
4809 4807
4810 /* FIXME: Don't execute user function in unsafe situation. */ 4808 // FIXME: Don't execute user function in unsafe situation.
4811 if (exiting || is_autocmd_blocked()) 4809 if (exiting || is_autocmd_blocked())
4812 return FALSE; 4810 return FALSE;
4813 /* FIXME: :py print 'xxx' is shown duplicate result. 4811 // FIXME: :py print 'xxx' is shown duplicate result.
4814 * Use silent to avoid it. */ 4812 // Use silent to avoid it.
4815 ++msg_silent; 4813 ++msg_silent;
4816 is_active = call_func_retnr(p_imsf, 0, NULL); 4814 is_active = call_func_retnr(p_imsf, 0, NULL);
4817 --msg_silent; 4815 --msg_silent;
4818 return (is_active > 0); 4816 return (is_active > 0);
4819 } 4817 }
4820 #endif 4818 #endif
4821 4819
4822 #if defined(FEAT_XIM) || defined(PROTO) 4820 #if defined(FEAT_XIM) || defined(PROTO)
4823 4821
4824 # if defined(FEAT_GUI_GTK) || defined(PROTO) 4822 # if defined(FEAT_GUI_GTK) || defined(PROTO)
4825 static int xim_has_preediting INIT(= FALSE); /* IM current status */ 4823 static int xim_has_preediting INIT(= FALSE); // IM current status
4826 4824
4827 /* 4825 /*
4828 * Set preedit_start_col to the current cursor position. 4826 * Set preedit_start_col to the current cursor position.
4829 */ 4827 */
4830 static void 4828 static void
4832 { 4830 {
4833 if (State & CMDLINE) 4831 if (State & CMDLINE)
4834 preedit_start_col = cmdline_getvcol_cursor(); 4832 preedit_start_col = cmdline_getvcol_cursor();
4835 else if (curwin != NULL && curwin->w_buffer != NULL) 4833 else if (curwin != NULL && curwin->w_buffer != NULL)
4836 getvcol(curwin, &curwin->w_cursor, &preedit_start_col, NULL, NULL); 4834 getvcol(curwin, &curwin->w_cursor, &preedit_start_col, NULL, NULL);
4837 /* Prevent that preediting marks the buffer as changed. */ 4835 // Prevent that preediting marks the buffer as changed.
4838 xim_changed_while_preediting = curbuf->b_changed; 4836 xim_changed_while_preediting = curbuf->b_changed;
4839 } 4837 }
4840 4838
4841 static int im_is_active = FALSE; /* IM is enabled for current mode */ 4839 static int im_is_active = FALSE; // IM is enabled for current mode
4842 static int preedit_is_active = FALSE; 4840 static int preedit_is_active = FALSE;
4843 static int im_preedit_cursor = 0; /* cursor offset in characters */ 4841 static int im_preedit_cursor = 0; // cursor offset in characters
4844 static int im_preedit_trailing = 0; /* number of characters after cursor */ 4842 static int im_preedit_trailing = 0; // number of characters after cursor
4845 4843
4846 static unsigned long im_commit_handler_id = 0; 4844 static unsigned long im_commit_handler_id = 0;
4847 static unsigned int im_activatekey_keyval = GDK_VoidSymbol; 4845 static unsigned int im_activatekey_keyval = GDK_VoidSymbol;
4848 static unsigned int im_activatekey_state = 0; 4846 static unsigned int im_activatekey_state = 0;
4849 4847
4893 if (p_imst == IM_OVER_THE_SPOT) 4891 if (p_imst == IM_OVER_THE_SPOT)
4894 im_preedit_window_set_position(); 4892 im_preedit_window_set_position();
4895 } 4893 }
4896 } 4894 }
4897 4895
4898 # if 0 || defined(PROTO) /* apparently only used in gui_x11.c */ 4896 # if 0 || defined(PROTO) // apparently only used in gui_x11.c
4899 void 4897 void
4900 xim_set_preedit(void) 4898 xim_set_preedit(void)
4901 { 4899 {
4902 im_set_position(gui.row, gui.col); 4900 im_set_position(gui.row, gui.col);
4903 } 4901 }
4904 # endif 4902 # endif
4905 4903
4906 static void 4904 static void
4907 im_add_to_input(char_u *str, int len) 4905 im_add_to_input(char_u *str, int len)
4908 { 4906 {
4909 /* Convert from 'termencoding' (always "utf-8") to 'encoding' */ 4907 // Convert from 'termencoding' (always "utf-8") to 'encoding'
4910 if (input_conv.vc_type != CONV_NONE) 4908 if (input_conv.vc_type != CONV_NONE)
4911 { 4909 {
4912 str = string_convert(&input_conv, str, &len); 4910 str = string_convert(&input_conv, str, &len);
4913 g_return_if_fail(str != NULL); 4911 g_return_if_fail(str != NULL);
4914 } 4912 }
4916 add_to_input_buf_csi(str, len); 4914 add_to_input_buf_csi(str, len);
4917 4915
4918 if (input_conv.vc_type != CONV_NONE) 4916 if (input_conv.vc_type != CONV_NONE)
4919 vim_free(str); 4917 vim_free(str);
4920 4918
4921 if (p_mh) /* blank out the pointer if necessary */ 4919 if (p_mh) // blank out the pointer if necessary
4922 gui_mch_mousehide(TRUE); 4920 gui_mch_mousehide(TRUE);
4923 } 4921 }
4924 4922
4925 static void 4923 static void
4926 im_preedit_window_set_position(void) 4924 im_preedit_window_set_position(void)
4984 = pango_font_description_get_size(gui.norm_font) / PANGO_SCALE; 4982 = pango_font_description_get_size(gui.norm_font) / PANGO_SCALE;
4985 gchar *fontsize_propval = NULL; 4983 gchar *fontsize_propval = NULL;
4986 4984
4987 if (!pango_font_description_get_size_is_absolute(gui.norm_font)) 4985 if (!pango_font_description_get_size_is_absolute(gui.norm_font))
4988 { 4986 {
4989 /* fontsize was given in points. Convert it into that in pixels 4987 // fontsize was given in points. Convert it into that in pixels
4990 * to use with CSS. */ 4988 // to use with CSS.
4991 GdkScreen * const screen 4989 GdkScreen * const screen
4992 = gdk_window_get_screen(gtk_widget_get_window(gui.mainwin)); 4990 = gdk_window_get_screen(gtk_widget_get_window(gui.mainwin));
4993 const gdouble dpi = gdk_screen_get_resolution(screen); 4991 const gdouble dpi = gdk_screen_get_resolution(screen);
4994 fontsize = dpi * fontsize / 72; 4992 fontsize = dpi * fontsize / 72;
4995 } 4993 }
5076 static void 5074 static void
5077 im_show_preedit() 5075 im_show_preedit()
5078 { 5076 {
5079 im_preedit_window_open(); 5077 im_preedit_window_open();
5080 5078
5081 if (p_mh) /* blank out the pointer if necessary */ 5079 if (p_mh) // blank out the pointer if necessary
5082 gui_mch_mousehide(TRUE); 5080 gui_mch_mousehide(TRUE);
5083 } 5081 }
5084 5082
5085 static void 5083 static void
5086 im_delete_preedit(void) 5084 im_delete_preedit(void)
5170 xim_log("im_commit_cb(): %s\n", str); 5168 xim_log("im_commit_cb(): %s\n", str);
5171 #endif 5169 #endif
5172 5170
5173 if (p_imst == IM_ON_THE_SPOT) 5171 if (p_imst == IM_ON_THE_SPOT)
5174 { 5172 {
5175 /* The imhangul module doesn't reset the preedit string before 5173 // The imhangul module doesn't reset the preedit string before
5176 * committing. Call im_delete_preedit() to work around that. */ 5174 // committing. Call im_delete_preedit() to work around that.
5177 im_delete_preedit(); 5175 im_delete_preedit();
5178 5176
5179 /* Indicate that preediting has finished. */ 5177 // Indicate that preediting has finished.
5180 if (preedit_start_col == MAXCOL) 5178 if (preedit_start_col == MAXCOL)
5181 { 5179 {
5182 init_preedit_start_col(); 5180 init_preedit_start_col();
5183 commit_with_preedit = FALSE; 5181 commit_with_preedit = FALSE;
5184 } 5182 }
5185 5183
5186 /* The thing which setting "preedit_start_col" to MAXCOL means that 5184 // The thing which setting "preedit_start_col" to MAXCOL means that
5187 * "preedit_start_col" will be set forcedly when calling 5185 // "preedit_start_col" will be set forcedly when calling
5188 * preedit_changed_cb() next time. 5186 // preedit_changed_cb() next time.
5189 * "preedit_start_col" should not reset with MAXCOL on this part. Vim 5187 // "preedit_start_col" should not reset with MAXCOL on this part. Vim
5190 * is simulating the preediting by using add_to_input_str(). when 5188 // is simulating the preediting by using add_to_input_str(). when
5191 * preedit begin immediately before committed, the typebuf is not 5189 // preedit begin immediately before committed, the typebuf is not
5192 * flushed to screen, then it can't get correct "preedit_start_col". 5190 // flushed to screen, then it can't get correct "preedit_start_col".
5193 * Thus, it should calculate the cells by adding cells of the committed 5191 // Thus, it should calculate the cells by adding cells of the committed
5194 * string. */ 5192 // string.
5195 if (input_conv.vc_type != CONV_NONE) 5193 if (input_conv.vc_type != CONV_NONE)
5196 { 5194 {
5197 im_str = string_convert(&input_conv, (char_u *)str, &len); 5195 im_str = string_convert(&input_conv, (char_u *)str, &len);
5198 g_return_if_fail(im_str != NULL); 5196 g_return_if_fail(im_str != NULL);
5199 } 5197 }
5205 if (input_conv.vc_type != CONV_NONE) 5203 if (input_conv.vc_type != CONV_NONE)
5206 vim_free(im_str); 5204 vim_free(im_str);
5207 preedit_start_col += clen; 5205 preedit_start_col += clen;
5208 } 5206 }
5209 5207
5210 /* Is this a single character that matches a keypad key that's just 5208 // Is this a single character that matches a keypad key that's just
5211 * been pressed? If so, we don't want it to be entered as such - let 5209 // been pressed? If so, we don't want it to be entered as such - let
5212 * us carry on processing the raw keycode so that it may be used in 5210 // us carry on processing the raw keycode so that it may be used in
5213 * mappings as <kSomething>. */ 5211 // mappings as <kSomething>.
5214 if (xim_expected_char != NUL) 5212 if (xim_expected_char != NUL)
5215 { 5213 {
5216 /* We're currently processing a keypad or other special key */ 5214 // We're currently processing a keypad or other special key
5217 if (slen == 1 && str[0] == xim_expected_char) 5215 if (slen == 1 && str[0] == xim_expected_char)
5218 { 5216 {
5219 /* It's a match - don't do it here */ 5217 // It's a match - don't do it here
5220 xim_ignored_char = TRUE; 5218 xim_ignored_char = TRUE;
5221 add_to_input = FALSE; 5219 add_to_input = FALSE;
5222 } 5220 }
5223 else 5221 else
5224 { 5222 {
5225 /* Not a match */ 5223 // Not a match
5226 xim_ignored_char = FALSE; 5224 xim_ignored_char = FALSE;
5227 } 5225 }
5228 } 5226 }
5229 5227
5230 if (add_to_input) 5228 if (add_to_input)
5231 im_add_to_input((char_u *)str, slen); 5229 im_add_to_input((char_u *)str, slen);
5232 5230
5233 if (p_imst == IM_ON_THE_SPOT) 5231 if (p_imst == IM_ON_THE_SPOT)
5234 { 5232 {
5235 /* Inserting chars while "im_is_active" is set does not cause a 5233 // Inserting chars while "im_is_active" is set does not cause a
5236 * change of buffer. When the chars are committed the buffer must be 5234 // change of buffer. When the chars are committed the buffer must be
5237 * marked as changed. */ 5235 // marked as changed.
5238 if (!commit_with_preedit) 5236 if (!commit_with_preedit)
5239 preedit_start_col = MAXCOL; 5237 preedit_start_col = MAXCOL;
5240 5238
5241 /* This flag is used in changed() at next call. */ 5239 // This flag is used in changed() at next call.
5242 xim_changed_while_preediting = TRUE; 5240 xim_changed_while_preediting = TRUE;
5243 } 5241 }
5244 5242
5245 if (gtk_main_level() > 0) 5243 if (gtk_main_level() > 0)
5246 gtk_main_quit(); 5244 gtk_main_quit();
5271 #ifdef XIM_DEBUG 5269 #ifdef XIM_DEBUG
5272 xim_log("im_preedit_end_cb()\n"); 5270 xim_log("im_preedit_end_cb()\n");
5273 #endif 5271 #endif
5274 im_delete_preedit(); 5272 im_delete_preedit();
5275 5273
5276 /* Indicate that preediting has finished */ 5274 // Indicate that preediting has finished
5277 if (p_imst == IM_ON_THE_SPOT) 5275 if (p_imst == IM_ON_THE_SPOT)
5278 preedit_start_col = MAXCOL; 5276 preedit_start_col = MAXCOL;
5279 xim_has_preediting = FALSE; 5277 xim_has_preediting = FALSE;
5280 5278
5281 #if 0 5279 #if 0
5282 /* Removal of this line suggested by Takuhiro Nishioka. Fixes that IM was 5280 // Removal of this line suggested by Takuhiro Nishioka. Fixes that IM was
5283 * switched off unintentionally. We now use preedit_is_active (added by 5281 // switched off unintentionally. We now use preedit_is_active (added by
5284 * SungHyun Nam). */ 5282 // SungHyun Nam).
5285 im_is_active = FALSE; 5283 im_is_active = FALSE;
5286 #endif 5284 #endif
5287 preedit_is_active = FALSE; 5285 preedit_is_active = FALSE;
5288 gui_update_cursor(TRUE, FALSE); 5286 gui_update_cursor(TRUE, FALSE);
5289 im_show_info(); 5287 im_show_info();
5347 5345
5348 #ifdef XIM_DEBUG 5346 #ifdef XIM_DEBUG
5349 xim_log("im_preedit_changed_cb(): %s\n", preedit_string); 5347 xim_log("im_preedit_changed_cb(): %s\n", preedit_string);
5350 #endif 5348 #endif
5351 5349
5352 g_return_if_fail(preedit_string != NULL); /* just in case */ 5350 g_return_if_fail(preedit_string != NULL); // just in case
5353 5351
5354 if (p_imst == IM_OVER_THE_SPOT) 5352 if (p_imst == IM_OVER_THE_SPOT)
5355 { 5353 {
5356 if (preedit_string[0] == NUL) 5354 if (preedit_string[0] == NUL)
5357 { 5355 {
5364 im_show_preedit(); 5362 im_show_preedit();
5365 } 5363 }
5366 } 5364 }
5367 else 5365 else
5368 { 5366 {
5369 /* If preedit_start_col is MAXCOL set it to the current cursor position. */ 5367 // If preedit_start_col is MAXCOL set it to the current cursor position.
5370 if (preedit_start_col == MAXCOL && preedit_string[0] != '\0') 5368 if (preedit_start_col == MAXCOL && preedit_string[0] != '\0')
5371 { 5369 {
5372 xim_has_preediting = TRUE; 5370 xim_has_preediting = TRUE;
5373 5371
5374 /* Urgh, this breaks if the input buffer isn't empty now */ 5372 // Urgh, this breaks if the input buffer isn't empty now
5375 init_preedit_start_col(); 5373 init_preedit_start_col();
5376 } 5374 }
5377 else if (cursor_index == 0 && preedit_string[0] == '\0') 5375 else if (cursor_index == 0 && preedit_string[0] == '\0')
5378 { 5376 {
5379 xim_has_preediting = FALSE; 5377 xim_has_preediting = FALSE;
5380 5378
5381 /* If at the start position (after typing backspace) 5379 // If at the start position (after typing backspace)
5382 * preedit_start_col must be reset. */ 5380 // preedit_start_col must be reset.
5383 preedit_start_col = MAXCOL; 5381 preedit_start_col = MAXCOL;
5384 } 5382 }
5385 5383
5386 im_delete_preedit(); 5384 im_delete_preedit();
5387 5385
5412 else 5410 else
5413 ++im_preedit_trailing; 5411 ++im_preedit_trailing;
5414 } 5412 }
5415 if (!is_composing && i >= cursor_index) 5413 if (!is_composing && i >= cursor_index)
5416 { 5414 {
5417 /* This is essentially the same as im_preedit_trailing, except 5415 // This is essentially the same as im_preedit_trailing, except
5418 * composing characters are not counted even if p_deco is set. */ 5416 // composing characters are not counted even if p_deco is set.
5419 ++num_move_back; 5417 ++num_move_back;
5420 } 5418 }
5421 if (preedit_start_col != MAXCOL) 5419 if (preedit_start_col != MAXCOL)
5422 preedit_end_col += utf_ptr2cells(p); 5420 preedit_end_col += utf_ptr2cells(p);
5423 } 5421 }
5464 attr = pango_attr_iterator_get(iter, PANGO_ATTR_BACKGROUND); 5462 attr = pango_attr_iterator_get(iter, PANGO_ATTR_BACKGROUND);
5465 if (attr != NULL) 5463 if (attr != NULL)
5466 { 5464 {
5467 const PangoColor *color = &((PangoAttrColor *)attr)->color; 5465 const PangoColor *color = &((PangoAttrColor *)attr)->color;
5468 5466
5469 /* Assume inverse if black background is requested */ 5467 // Assume inverse if black background is requested
5470 if ((color->red | color->green | color->blue) == 0) 5468 if ((color->red | color->green | color->blue) == 0)
5471 char_attr |= HL_INVERSE; 5469 char_attr |= HL_INVERSE;
5472 } 5470 }
5473 5471
5474 return char_attr; 5472 return char_attr;
5492 5490
5493 if (preedit_string != NULL && attr_list != NULL) 5491 if (preedit_string != NULL && attr_list != NULL)
5494 { 5492 {
5495 int idx; 5493 int idx;
5496 5494
5497 /* Get the byte index as used by PangoAttrIterator */ 5495 // Get the byte index as used by PangoAttrIterator
5498 for (idx = 0; col > 0 && preedit_string[idx] != '\0'; --col) 5496 for (idx = 0; col > 0 && preedit_string[idx] != '\0'; --col)
5499 idx += utfc_ptr2len((char_u *)preedit_string + idx); 5497 idx += utfc_ptr2len((char_u *)preedit_string + idx);
5500 5498
5501 if (preedit_string[idx] != '\0') 5499 if (preedit_string[idx] != '\0')
5502 { 5500 {
5504 int start, end; 5502 int start, end;
5505 5503
5506 char_attr = HL_NORMAL; 5504 char_attr = HL_NORMAL;
5507 iter = pango_attr_list_get_iterator(attr_list); 5505 iter = pango_attr_list_get_iterator(attr_list);
5508 5506
5509 /* Extract all relevant attributes from the list. */ 5507 // Extract all relevant attributes from the list.
5510 do 5508 do
5511 { 5509 {
5512 pango_attr_iterator_range(iter, &start, &end); 5510 pango_attr_iterator_range(iter, &start, &end);
5513 5511
5514 if (idx >= start && idx < end) 5512 if (idx >= start && idx < end)
5586 unsigned tmp_state = 0; 5584 unsigned tmp_state = 0;
5587 5585
5588 mods_end = strrchr(str, '-'); 5586 mods_end = strrchr(str, '-');
5589 mods_end = (mods_end != NULL) ? mods_end + 1 : str; 5587 mods_end = (mods_end != NULL) ? mods_end + 1 : str;
5590 5588
5591 /* Parse modifier keys */ 5589 // Parse modifier keys
5592 while (str < mods_end) 5590 while (str < mods_end)
5593 switch (*str++) 5591 switch (*str++)
5594 { 5592 {
5595 case '-': break; 5593 case '-': break;
5596 case 'S': case 's': tmp_state |= (unsigned)GDK_SHIFT_MASK; break; 5594 case 'S': case 's': tmp_state |= (unsigned)GDK_SHIFT_MASK; break;
5647 { 5645 {
5648 GdkEventKey *event; 5646 GdkEventKey *event;
5649 5647
5650 event = (GdkEventKey *)gdk_event_new(GDK_KEY_PRESS); 5648 event = (GdkEventKey *)gdk_event_new(GDK_KEY_PRESS);
5651 g_object_ref(gtk_widget_get_window(gui.drawarea)); 5649 g_object_ref(gtk_widget_get_window(gui.drawarea));
5652 /* unreffed by gdk_event_free() */ 5650 // unreffed by gdk_event_free()
5653 event->window = gtk_widget_get_window(gui.drawarea); 5651 event->window = gtk_widget_get_window(gui.drawarea);
5654 event->send_event = TRUE; 5652 event->send_event = TRUE;
5655 event->time = GDK_CURRENT_TIME; 5653 event->time = GDK_CURRENT_TIME;
5656 event->state = state; 5654 event->state = state;
5657 event->keyval = keyval; 5655 event->keyval = keyval;
5658 event->hardware_keycode = /* needed for XIM */ 5656 event->hardware_keycode = // needed for XIM
5659 XKeysymToKeycode(GDK_WINDOW_XDISPLAY(event->window), (KeySym)keyval); 5657 XKeysymToKeycode(GDK_WINDOW_XDISPLAY(event->window), (KeySym)keyval);
5660 event->length = 0; 5658 event->length = 0;
5661 event->string = NULL; 5659 event->string = NULL;
5662 5660
5663 gtk_im_context_filter_keypress(xic, event); 5661 gtk_im_context_filter_keypress(xic, event);
5664 5662
5665 /* For consistency, also send the corresponding release event. */ 5663 // For consistency, also send the corresponding release event.
5666 event->type = GDK_KEY_RELEASE; 5664 event->type = GDK_KEY_RELEASE;
5667 event->send_event = FALSE; 5665 event->send_event = FALSE;
5668 gtk_im_context_filter_keypress(xic, event); 5666 gtk_im_context_filter_keypress(xic, event);
5669 5667
5670 gdk_event_free((GdkEvent *)event); 5668 gdk_event_free((GdkEvent *)event);
5768 if (event->keyval == im_activatekey_keyval 5766 if (event->keyval == im_activatekey_keyval
5769 && (event->state & im_activatekey_state) == im_activatekey_state) 5767 && (event->state & im_activatekey_state) == im_activatekey_state)
5770 { 5768 {
5771 unsigned int state_mask; 5769 unsigned int state_mask;
5772 5770
5773 /* Require the state of the 3 most used modifiers to match exactly. 5771 // Require the state of the 3 most used modifiers to match exactly.
5774 * Otherwise e.g. <S-C-space> would be unusable for other purposes 5772 // Otherwise e.g. <S-C-space> would be unusable for other purposes
5775 * if the IM activate key is <S-space>. */ 5773 // if the IM activate key is <S-space>.
5776 state_mask = im_activatekey_state; 5774 state_mask = im_activatekey_state;
5777 state_mask |= ((int)GDK_SHIFT_MASK | (int)GDK_CONTROL_MASK 5775 state_mask |= ((int)GDK_SHIFT_MASK | (int)GDK_CONTROL_MASK
5778 | (int)GDK_MOD1_MASK); 5776 | (int)GDK_MOD1_MASK);
5779 5777
5780 if ((event->state & state_mask) != im_activatekey_state) 5778 if ((event->state & state_mask) != im_activatekey_state)
5781 return FALSE; 5779 return FALSE;
5782 5780
5783 /* Don't send it a second time on GDK_KEY_RELEASE. */ 5781 // Don't send it a second time on GDK_KEY_RELEASE.
5784 if (event->type != GDK_KEY_PRESS) 5782 if (event->type != GDK_KEY_PRESS)
5785 return TRUE; 5783 return TRUE;
5786 5784
5787 if (map_to_exists_mode((char_u *)"", LANGMAP, FALSE)) 5785 if (map_to_exists_mode((char_u *)"", LANGMAP, FALSE))
5788 { 5786 {
5789 im_set_active(FALSE); 5787 im_set_active(FALSE);
5790 5788
5791 /* ":lmap" mappings exists, toggle use of mappings. */ 5789 // ":lmap" mappings exists, toggle use of mappings.
5792 State ^= LANGMAP; 5790 State ^= LANGMAP;
5793 if (State & LANGMAP) 5791 if (State & LANGMAP)
5794 { 5792 {
5795 curbuf->b_p_iminsert = B_IMODE_NONE; 5793 curbuf->b_p_iminsert = B_IMODE_NONE;
5796 State &= ~LANGMAP; 5794 State &= ~LANGMAP;
5804 } 5802 }
5805 5803
5806 return gtk_im_context_filter_keypress(xic, event); 5804 return gtk_im_context_filter_keypress(xic, event);
5807 } 5805 }
5808 5806
5809 /* Don't filter events through the IM context if IM isn't active 5807 // Don't filter events through the IM context if IM isn't active
5810 * right now. Unlike with GTK+ 1.2 we cannot rely on the IM module 5808 // right now. Unlike with GTK+ 1.2 we cannot rely on the IM module
5811 * not doing anything before the activation key was sent. */ 5809 // not doing anything before the activation key was sent.
5812 if (im_activatekey_keyval == GDK_VoidSymbol || im_is_active) 5810 if (im_activatekey_keyval == GDK_VoidSymbol || im_is_active)
5813 { 5811 {
5814 int imresult = gtk_im_context_filter_keypress(xic, event); 5812 int imresult = gtk_im_context_filter_keypress(xic, event);
5815 5813
5816 if (p_imst == IM_ON_THE_SPOT) 5814 if (p_imst == IM_ON_THE_SPOT)
5817 { 5815 {
5818 /* Some XIM send following sequence: 5816 // Some XIM send following sequence:
5819 * 1. preedited string. 5817 // 1. preedited string.
5820 * 2. committed string. 5818 // 2. committed string.
5821 * 3. line changed key. 5819 // 3. line changed key.
5822 * 4. preedited string. 5820 // 4. preedited string.
5823 * 5. remove preedited string. 5821 // 5. remove preedited string.
5824 * if 3, Vim can't move back the above line for 5. 5822 // if 3, Vim can't move back the above line for 5.
5825 * thus, this part should not parse the key. */ 5823 // thus, this part should not parse the key.
5826 if (!imresult && preedit_start_col != MAXCOL 5824 if (!imresult && preedit_start_col != MAXCOL
5827 && event->keyval == GDK_Return) 5825 && event->keyval == GDK_Return)
5828 { 5826 {
5829 im_synthesize_keypress(GDK_Return, 0U); 5827 im_synthesize_keypress(GDK_Return, 0U);
5830 return FALSE; 5828 return FALSE;
5831 } 5829 }
5832 } 5830 }
5833 5831
5834 /* If XIM tried to commit a keypad key as a single char., 5832 // If XIM tried to commit a keypad key as a single char.,
5835 * ignore it so we can use the keypad key 'raw', for mappings. */ 5833 // ignore it so we can use the keypad key 'raw', for mappings.
5836 if (xim_expected_char != NUL && xim_ignored_char) 5834 if (xim_expected_char != NUL && xim_ignored_char)
5837 /* We had a keypad key, and XIM tried to thieve it */ 5835 // We had a keypad key, and XIM tried to thieve it
5838 return FALSE; 5836 return FALSE;
5839 5837
5840 /* This is supposed to fix a problem with iBus, that space 5838 // This is supposed to fix a problem with iBus, that space
5841 * characters don't work in input mode. */ 5839 // characters don't work in input mode.
5842 xim_expected_char = NUL; 5840 xim_expected_char = NUL;
5843 5841
5844 /* Normal processing */ 5842 // Normal processing
5845 return imresult; 5843 return imresult;
5846 } 5844 }
5847 } 5845 }
5848 5846
5849 return FALSE; 5847 return FALSE;
5869 im_is_preediting(void) 5867 im_is_preediting(void)
5870 { 5868 {
5871 return xim_has_preediting; 5869 return xim_has_preediting;
5872 } 5870 }
5873 5871
5874 # else /* !FEAT_GUI_GTK */ 5872 # else // !FEAT_GUI_GTK
5875 5873
5876 static int xim_is_active = FALSE; /* XIM should be active in the current 5874 static int xim_is_active = FALSE; // XIM should be active in the current
5877 mode */ 5875 // mode
5878 static int xim_has_focus = FALSE; /* XIM is really being used for Vim */ 5876 static int xim_has_focus = FALSE; // XIM is really being used for Vim
5879 # ifdef FEAT_GUI_X11 5877 # ifdef FEAT_GUI_X11
5880 static XIMStyle input_style; 5878 static XIMStyle input_style;
5881 static int status_area_enabled = TRUE; 5879 static int status_area_enabled = TRUE;
5882 # endif 5880 # endif
5883 5881
5888 void 5886 void
5889 im_set_active(int active_arg) 5887 im_set_active(int active_arg)
5890 { 5888 {
5891 int active = active_arg; 5889 int active = active_arg;
5892 5890
5893 /* If 'imdisable' is set, XIM is never active. */ 5891 // If 'imdisable' is set, XIM is never active.
5894 if (p_imdisable) 5892 if (p_imdisable)
5895 active = FALSE; 5893 active = FALSE;
5896 else if (input_style & XIMPreeditPosition) 5894 else if (input_style & XIMPreeditPosition)
5897 /* There is a problem in switching XIM off when preediting is used, 5895 // There is a problem in switching XIM off when preediting is used,
5898 * and it is not clear how this can be solved. For now, keep XIM on 5896 // and it is not clear how this can be solved. For now, keep XIM on
5899 * all the time, like it was done in Vim 5.8. */ 5897 // all the time, like it was done in Vim 5.8.
5900 active = TRUE; 5898 active = TRUE;
5901 5899
5902 # if defined(FEAT_EVAL) 5900 # if defined(FEAT_EVAL)
5903 if (USE_IMACTIVATEFUNC) 5901 if (USE_IMACTIVATEFUNC)
5904 { 5902 {
5912 # endif 5910 # endif
5913 5911
5914 if (xic == NULL) 5912 if (xic == NULL)
5915 return; 5913 return;
5916 5914
5917 /* Remember the active state, it is needed when Vim gets keyboard focus. */ 5915 // Remember the active state, it is needed when Vim gets keyboard focus.
5918 xim_is_active = active; 5916 xim_is_active = active;
5919 xim_set_preedit(); 5917 xim_set_preedit();
5920 } 5918 }
5921 5919
5922 /* 5920 /*
5973 5971
5974 xim_set_focus(TRUE); 5972 xim_set_focus(TRUE);
5975 5973
5976 if (!xim_has_focus) 5974 if (!xim_has_focus)
5977 { 5975 {
5978 /* hide XIM cursor */ 5976 // hide XIM cursor
5979 over_spot.x = 0; 5977 over_spot.x = 0;
5980 over_spot.y = -100; /* arbitrary invisible position */ 5978 over_spot.y = -100; // arbitrary invisible position
5981 attr_list = (XVaNestedList) XVaCreateNestedList(0, 5979 attr_list = (XVaNestedList) XVaCreateNestedList(0,
5982 XNSpotLocation, 5980 XNSpotLocation,
5983 &over_spot, 5981 &over_spot,
5984 NULL); 5982 NULL);
5985 XSetICValues(xic, XNPreeditAttributes, attr_list, NULL); 5983 XSetICValues(xic, XNPreeditAttributes, attr_list, NULL);
6153 } 6151 }
6154 6152
6155 if (xim == NULL && (p = XSetLocaleModifiers("")) != NULL && *p != NUL) 6153 if (xim == NULL && (p = XSetLocaleModifiers("")) != NULL && *p != NUL)
6156 xim = XOpenIM(x11_display, NULL, NULL, NULL); 6154 xim = XOpenIM(x11_display, NULL, NULL, NULL);
6157 6155
6158 /* This is supposed to be useful to obtain characters through 6156 // This is supposed to be useful to obtain characters through
6159 * XmbLookupString() without really using a XIM. */ 6157 // XmbLookupString() without really using a XIM.
6160 if (xim == NULL && (p = XSetLocaleModifiers("@im=none")) != NULL 6158 if (xim == NULL && (p = XSetLocaleModifiers("@im=none")) != NULL
6161 && *p != NUL) 6159 && *p != NUL)
6162 xim = XOpenIM(x11_display, NULL, NULL, NULL); 6160 xim = XOpenIM(x11_display, NULL, NULL, NULL);
6163 6161
6164 if (xim == NULL) 6162 if (xim == NULL)
6165 { 6163 {
6166 /* Only give this message when verbose is set, because too many people 6164 // Only give this message when verbose is set, because too many people
6167 * got this message when they didn't want to use a XIM. */ 6165 // got this message when they didn't want to use a XIM.
6168 if (p_verbose > 0) 6166 if (p_verbose > 0)
6169 { 6167 {
6170 verbose_enter(); 6168 verbose_enter();
6171 emsg(_("E286: Failed to open input method")); 6169 emsg(_("E286: Failed to open input method"));
6172 verbose_leave(); 6170 verbose_leave();
6239 } 6237 }
6240 XFree(xim_styles); 6238 XFree(xim_styles);
6241 6239
6242 if (!found) 6240 if (!found)
6243 { 6241 {
6244 /* Only give this message when verbose is set, because too many people 6242 // Only give this message when verbose is set, because too many people
6245 * got this message when they didn't want to use a XIM. */ 6243 // got this message when they didn't want to use a XIM.
6246 if (p_verbose > 0) 6244 if (p_verbose > 0)
6247 { 6245 {
6248 verbose_enter(); 6246 verbose_enter();
6249 emsg(_("E289: input method doesn't support my preedit type")); 6247 emsg(_("E289: input method doesn't support my preedit type"));
6250 verbose_leave(); 6248 verbose_leave();
6255 6253
6256 over_spot.x = TEXT_X(gui.col); 6254 over_spot.x = TEXT_X(gui.col);
6257 over_spot.y = TEXT_Y(gui.row); 6255 over_spot.y = TEXT_Y(gui.row);
6258 input_style = this_input_style; 6256 input_style = this_input_style;
6259 6257
6260 /* A crash was reported when trying to pass gui.norm_font as XNFontSet, 6258 // A crash was reported when trying to pass gui.norm_font as XNFontSet,
6261 * thus that has been removed. Hopefully the default works... */ 6259 // thus that has been removed. Hopefully the default works...
6262 # ifdef FEAT_XFONTSET 6260 # ifdef FEAT_XFONTSET
6263 if (gui.fontset != NOFONTSET) 6261 if (gui.fontset != NOFONTSET)
6264 { 6262 {
6265 preedit_list = XVaCreateNestedList(0, 6263 preedit_list = XVaCreateNestedList(0,
6266 XNSpotLocation, &over_spot, 6264 XNSpotLocation, &over_spot,
6316 } 6314 }
6317 6315
6318 return TRUE; 6316 return TRUE;
6319 } 6317 }
6320 6318
6321 # endif /* FEAT_GUI_X11 */ 6319 # endif // FEAT_GUI_X11
6322 6320
6323 /* 6321 /*
6324 * Get IM status. When IM is on, return TRUE. Else return FALSE. 6322 * Get IM status. When IM is on, return TRUE. Else return FALSE.
6325 * FIXME: This doesn't work correctly: Having focus doesn't always mean XIM is 6323 * FIXME: This doesn't work correctly: Having focus doesn't always mean XIM is
6326 * active, when not having focus XIM may still be active (e.g., when using a 6324 * active, when not having focus XIM may still be active (e.g., when using a
6334 return call_imstatusfunc(); 6332 return call_imstatusfunc();
6335 # endif 6333 # endif
6336 return xim_has_focus; 6334 return xim_has_focus;
6337 } 6335 }
6338 6336
6339 # endif /* !FEAT_GUI_GTK */ 6337 # endif // !FEAT_GUI_GTK
6340 6338
6341 # if !defined(FEAT_GUI_GTK) || defined(PROTO) 6339 # if !defined(FEAT_GUI_GTK) || defined(PROTO)
6342 /* 6340 /*
6343 * Set up the status area. 6341 * Set up the status area.
6344 * 6342 *
6360 { 6358 {
6361 if (input_style & XIMPreeditArea) 6359 if (input_style & XIMPreeditArea)
6362 { 6360 {
6363 XRectangle *needed_rect; 6361 XRectangle *needed_rect;
6364 6362
6365 /* to get status_area width */ 6363 // to get status_area width
6366 status_list = XVaCreateNestedList(0, XNAreaNeeded, 6364 status_list = XVaCreateNestedList(0, XNAreaNeeded,
6367 &needed_rect, NULL); 6365 &needed_rect, NULL);
6368 XGetICValues(xic, XNStatusAttributes, status_list, NULL); 6366 XGetICValues(xic, XNStatusAttributes, status_list, NULL);
6369 XFree(status_list); 6367 XFree(status_list);
6370 6368
6396 #endif 6394 #endif
6397 status_area.width = 0; 6395 status_area.width = 0;
6398 status_area.height = gui.char_height; 6396 status_area.height = gui.char_height;
6399 } 6397 }
6400 6398
6401 if (input_style & XIMPreeditArea) /* off-the-spot */ 6399 if (input_style & XIMPreeditArea) // off-the-spot
6402 { 6400 {
6403 pre_area.x = status_area.x + status_area.width; 6401 pre_area.x = status_area.x + status_area.width;
6404 pre_area.y = gui.char_height * Rows + gui.border_offset; 6402 pre_area.y = gui.char_height * Rows + gui.border_offset;
6405 pre_area.width = gui.char_width * Columns - pre_area.x; 6403 pre_area.width = gui.char_width * Columns - pre_area.x;
6406 if (gui.which_scrollbars[SBAR_BOTTOM]) 6404 if (gui.which_scrollbars[SBAR_BOTTOM])
6410 pre_area.y += gui.menu_height; 6408 pre_area.y += gui.menu_height;
6411 #endif 6409 #endif
6412 pre_area.height = gui.char_height; 6410 pre_area.height = gui.char_height;
6413 preedit_list = XVaCreateNestedList(0, XNArea, &pre_area, NULL); 6411 preedit_list = XVaCreateNestedList(0, XNArea, &pre_area, NULL);
6414 } 6412 }
6415 else if (input_style & XIMPreeditPosition) /* over-the-spot */ 6413 else if (input_style & XIMPreeditPosition) // over-the-spot
6416 { 6414 {
6417 pre_area.x = 0; 6415 pre_area.x = 0;
6418 pre_area.y = 0; 6416 pre_area.y = 0;
6419 pre_area.height = gui.char_height * Rows; 6417 pre_area.height = gui.char_height * Rows;
6420 pre_area.width = gui.char_width * Columns; 6418 pre_area.width = gui.char_width * Columns;
6451 return gui.char_height; 6449 return gui.char_height;
6452 return 0; 6450 return 0;
6453 } 6451 }
6454 # endif 6452 # endif
6455 6453
6456 #else /* !defined(FEAT_XIM) */ 6454 #else // !defined(FEAT_XIM)
6457 6455
6458 # if defined(IME_WITHOUT_XIM) || defined(VIMDLL) 6456 # if defined(IME_WITHOUT_XIM) || defined(VIMDLL)
6459 static int im_was_set_active = FALSE; 6457 static int im_was_set_active = FALSE;
6460 6458
6461 int 6459 int
6496 { 6494 {
6497 } 6495 }
6498 # endif 6496 # endif
6499 # endif 6497 # endif
6500 6498
6501 #endif /* FEAT_XIM */ 6499 #endif // FEAT_XIM
6502 6500
6503 #if defined(FEAT_EVAL) || defined(PROTO) 6501 #if defined(FEAT_EVAL) || defined(PROTO)
6504 /* 6502 /*
6505 * "getimstatus()" function 6503 * "getimstatus()" function
6506 */ 6504 */
6543 int from_prop; 6541 int from_prop;
6544 int to_prop; 6542 int to_prop;
6545 int from_is_utf8; 6543 int from_is_utf8;
6546 int to_is_utf8; 6544 int to_is_utf8;
6547 6545
6548 /* Reset to no conversion. */ 6546 // Reset to no conversion.
6549 #ifdef USE_ICONV 6547 #ifdef USE_ICONV
6550 if (vcp->vc_type == CONV_ICONV && vcp->vc_fd != (iconv_t)-1) 6548 if (vcp->vc_type == CONV_ICONV && vcp->vc_fd != (iconv_t)-1)
6551 iconv_close(vcp->vc_fd); 6549 iconv_close(vcp->vc_fd);
6552 #endif 6550 #endif
6553 vcp->vc_type = CONV_NONE; 6551 vcp->vc_type = CONV_NONE;
6554 vcp->vc_factor = 1; 6552 vcp->vc_factor = 1;
6555 vcp->vc_fail = FALSE; 6553 vcp->vc_fail = FALSE;
6556 6554
6557 /* No conversion when one of the names is empty or they are equal. */ 6555 // No conversion when one of the names is empty or they are equal.
6558 if (from == NULL || *from == NUL || to == NULL || *to == NUL 6556 if (from == NULL || *from == NUL || to == NULL || *to == NUL
6559 || STRCMP(from, to) == 0) 6557 || STRCMP(from, to) == 0)
6560 return OK; 6558 return OK;
6561 6559
6562 from_prop = enc_canon_props(from); 6560 from_prop = enc_canon_props(from);
6570 else 6568 else
6571 to_is_utf8 = to_prop == ENC_UNICODE; 6569 to_is_utf8 = to_prop == ENC_UNICODE;
6572 6570
6573 if ((from_prop & ENC_LATIN1) && to_is_utf8) 6571 if ((from_prop & ENC_LATIN1) && to_is_utf8)
6574 { 6572 {
6575 /* Internal latin1 -> utf-8 conversion. */ 6573 // Internal latin1 -> utf-8 conversion.
6576 vcp->vc_type = CONV_TO_UTF8; 6574 vcp->vc_type = CONV_TO_UTF8;
6577 vcp->vc_factor = 2; /* up to twice as long */ 6575 vcp->vc_factor = 2; // up to twice as long
6578 } 6576 }
6579 else if ((from_prop & ENC_LATIN9) && to_is_utf8) 6577 else if ((from_prop & ENC_LATIN9) && to_is_utf8)
6580 { 6578 {
6581 /* Internal latin9 -> utf-8 conversion. */ 6579 // Internal latin9 -> utf-8 conversion.
6582 vcp->vc_type = CONV_9_TO_UTF8; 6580 vcp->vc_type = CONV_9_TO_UTF8;
6583 vcp->vc_factor = 3; /* up to three as long (euro sign) */ 6581 vcp->vc_factor = 3; // up to three as long (euro sign)
6584 } 6582 }
6585 else if (from_is_utf8 && (to_prop & ENC_LATIN1)) 6583 else if (from_is_utf8 && (to_prop & ENC_LATIN1))
6586 { 6584 {
6587 /* Internal utf-8 -> latin1 conversion. */ 6585 // Internal utf-8 -> latin1 conversion.
6588 vcp->vc_type = CONV_TO_LATIN1; 6586 vcp->vc_type = CONV_TO_LATIN1;
6589 } 6587 }
6590 else if (from_is_utf8 && (to_prop & ENC_LATIN9)) 6588 else if (from_is_utf8 && (to_prop & ENC_LATIN9))
6591 { 6589 {
6592 /* Internal utf-8 -> latin9 conversion. */ 6590 // Internal utf-8 -> latin9 conversion.
6593 vcp->vc_type = CONV_TO_LATIN9; 6591 vcp->vc_type = CONV_TO_LATIN9;
6594 } 6592 }
6595 #ifdef MSWIN 6593 #ifdef MSWIN
6596 /* Win32-specific codepage <-> codepage conversion without iconv. */ 6594 // Win32-specific codepage <-> codepage conversion without iconv.
6597 else if ((from_is_utf8 || encname2codepage(from) > 0) 6595 else if ((from_is_utf8 || encname2codepage(from) > 0)
6598 && (to_is_utf8 || encname2codepage(to) > 0)) 6596 && (to_is_utf8 || encname2codepage(to) > 0))
6599 { 6597 {
6600 vcp->vc_type = CONV_CODEPAGE; 6598 vcp->vc_type = CONV_CODEPAGE;
6601 vcp->vc_factor = 2; /* up to twice as long */ 6599 vcp->vc_factor = 2; // up to twice as long
6602 vcp->vc_cpfrom = from_is_utf8 ? 0 : encname2codepage(from); 6600 vcp->vc_cpfrom = from_is_utf8 ? 0 : encname2codepage(from);
6603 vcp->vc_cpto = to_is_utf8 ? 0 : encname2codepage(to); 6601 vcp->vc_cpto = to_is_utf8 ? 0 : encname2codepage(to);
6604 } 6602 }
6605 #endif 6603 #endif
6606 #ifdef MACOS_CONVERT 6604 #ifdef MACOS_CONVERT
6609 vcp->vc_type = CONV_MAC_LATIN1; 6607 vcp->vc_type = CONV_MAC_LATIN1;
6610 } 6608 }
6611 else if ((from_prop & ENC_MACROMAN) && to_is_utf8) 6609 else if ((from_prop & ENC_MACROMAN) && to_is_utf8)
6612 { 6610 {
6613 vcp->vc_type = CONV_MAC_UTF8; 6611 vcp->vc_type = CONV_MAC_UTF8;
6614 vcp->vc_factor = 2; /* up to twice as long */ 6612 vcp->vc_factor = 2; // up to twice as long
6615 } 6613 }
6616 else if ((from_prop & ENC_LATIN1) && (to_prop & ENC_MACROMAN)) 6614 else if ((from_prop & ENC_LATIN1) && (to_prop & ENC_MACROMAN))
6617 { 6615 {
6618 vcp->vc_type = CONV_LATIN1_MAC; 6616 vcp->vc_type = CONV_LATIN1_MAC;
6619 } 6617 }
6623 } 6621 }
6624 #endif 6622 #endif
6625 #ifdef USE_ICONV 6623 #ifdef USE_ICONV
6626 else 6624 else
6627 { 6625 {
6628 /* Use iconv() for conversion. */ 6626 // Use iconv() for conversion.
6629 vcp->vc_fd = (iconv_t)my_iconv_open( 6627 vcp->vc_fd = (iconv_t)my_iconv_open(
6630 to_is_utf8 ? (char_u *)"utf-8" : to, 6628 to_is_utf8 ? (char_u *)"utf-8" : to,
6631 from_is_utf8 ? (char_u *)"utf-8" : from); 6629 from_is_utf8 ? (char_u *)"utf-8" : from);
6632 if (vcp->vc_fd != (iconv_t)-1) 6630 if (vcp->vc_fd != (iconv_t)-1)
6633 { 6631 {
6634 vcp->vc_type = CONV_ICONV; 6632 vcp->vc_type = CONV_ICONV;
6635 vcp->vc_factor = 4; /* could be longer too... */ 6633 vcp->vc_factor = 4; // could be longer too...
6636 } 6634 }
6637 } 6635 }
6638 #endif 6636 #endif
6639 if (vcp->vc_type == CONV_NONE) 6637 if (vcp->vc_type == CONV_NONE)
6640 return FAIL; 6638 return FAIL;
6679 { 6677 {
6680 if (dlen <= maxlen) 6678 if (dlen <= maxlen)
6681 { 6679 {
6682 if (unconvertlen > 0) 6680 if (unconvertlen > 0)
6683 { 6681 {
6684 /* Move the unconverted characters to allocated memory. */ 6682 // Move the unconverted characters to allocated memory.
6685 *restp = alloc(unconvertlen); 6683 *restp = alloc(unconvertlen);
6686 if (*restp != NULL) 6684 if (*restp != NULL)
6687 mch_memmove(*restp, ptr + len - unconvertlen, unconvertlen); 6685 mch_memmove(*restp, ptr + len - unconvertlen, unconvertlen);
6688 *restlenp = unconvertlen; 6686 *restlenp = unconvertlen;
6689 } 6687 }
6690 mch_memmove(ptr, d, dlen); 6688 mch_memmove(ptr, d, dlen);
6691 } 6689 }
6692 else 6690 else
6693 /* result is too long, keep the unconverted text (the caller must 6691 // result is too long, keep the unconverted text (the caller must
6694 * have done something wrong!) */ 6692 // have done something wrong!)
6695 dlen = len; 6693 dlen = len;
6696 vim_free(d); 6694 vim_free(d);
6697 } 6695 }
6698 return dlen; 6696 return dlen;
6699 } 6697 }
6740 if (len == 0) 6738 if (len == 0)
6741 return vim_strsave((char_u *)""); 6739 return vim_strsave((char_u *)"");
6742 6740
6743 switch (vcp->vc_type) 6741 switch (vcp->vc_type)
6744 { 6742 {
6745 case CONV_TO_UTF8: /* latin1 to utf-8 conversion */ 6743 case CONV_TO_UTF8: // latin1 to utf-8 conversion
6746 retval = alloc(len * 2 + 1); 6744 retval = alloc(len * 2 + 1);
6747 if (retval == NULL) 6745 if (retval == NULL)
6748 break; 6746 break;
6749 d = retval; 6747 d = retval;
6750 for (i = 0; i < len; ++i) 6748 for (i = 0; i < len; ++i)
6761 *d = NUL; 6759 *d = NUL;
6762 if (lenp != NULL) 6760 if (lenp != NULL)
6763 *lenp = (int)(d - retval); 6761 *lenp = (int)(d - retval);
6764 break; 6762 break;
6765 6763
6766 case CONV_9_TO_UTF8: /* latin9 to utf-8 conversion */ 6764 case CONV_9_TO_UTF8: // latin9 to utf-8 conversion
6767 retval = alloc(len * 3 + 1); 6765 retval = alloc(len * 3 + 1);
6768 if (retval == NULL) 6766 if (retval == NULL)
6769 break; 6767 break;
6770 d = retval; 6768 d = retval;
6771 for (i = 0; i < len; ++i) 6769 for (i = 0; i < len; ++i)
6772 { 6770 {
6773 c = ptr[i]; 6771 c = ptr[i];
6774 switch (c) 6772 switch (c)
6775 { 6773 {
6776 case 0xa4: c = 0x20ac; break; /* euro */ 6774 case 0xa4: c = 0x20ac; break; // euro
6777 case 0xa6: c = 0x0160; break; /* S hat */ 6775 case 0xa6: c = 0x0160; break; // S hat
6778 case 0xa8: c = 0x0161; break; /* S -hat */ 6776 case 0xa8: c = 0x0161; break; // S -hat
6779 case 0xb4: c = 0x017d; break; /* Z hat */ 6777 case 0xb4: c = 0x017d; break; // Z hat
6780 case 0xb8: c = 0x017e; break; /* Z -hat */ 6778 case 0xb8: c = 0x017e; break; // Z -hat
6781 case 0xbc: c = 0x0152; break; /* OE */ 6779 case 0xbc: c = 0x0152; break; // OE
6782 case 0xbd: c = 0x0153; break; /* oe */ 6780 case 0xbd: c = 0x0153; break; // oe
6783 case 0xbe: c = 0x0178; break; /* Y */ 6781 case 0xbe: c = 0x0178; break; // Y
6784 } 6782 }
6785 d += utf_char2bytes(c, d); 6783 d += utf_char2bytes(c, d);
6786 } 6784 }
6787 *d = NUL; 6785 *d = NUL;
6788 if (lenp != NULL) 6786 if (lenp != NULL)
6789 *lenp = (int)(d - retval); 6787 *lenp = (int)(d - retval);
6790 break; 6788 break;
6791 6789
6792 case CONV_TO_LATIN1: /* utf-8 to latin1 conversion */ 6790 case CONV_TO_LATIN1: // utf-8 to latin1 conversion
6793 case CONV_TO_LATIN9: /* utf-8 to latin9 conversion */ 6791 case CONV_TO_LATIN9: // utf-8 to latin9 conversion
6794 retval = alloc(len + 1); 6792 retval = alloc(len + 1);
6795 if (retval == NULL) 6793 if (retval == NULL)
6796 break; 6794 break;
6797 d = retval; 6795 d = retval;
6798 for (i = 0; i < len; ++i) 6796 for (i = 0; i < len; ++i)
6804 { 6802 {
6805 int l_w = utf8len_tab_zero[ptr[i]]; 6803 int l_w = utf8len_tab_zero[ptr[i]];
6806 6804
6807 if (l_w == 0) 6805 if (l_w == 0)
6808 { 6806 {
6809 /* Illegal utf-8 byte cannot be converted */ 6807 // Illegal utf-8 byte cannot be converted
6810 vim_free(retval); 6808 vim_free(retval);
6811 return NULL; 6809 return NULL;
6812 } 6810 }
6813 if (unconvlenp != NULL && l_w > len - i) 6811 if (unconvlenp != NULL && l_w > len - i)
6814 { 6812 {
6815 /* Incomplete sequence at the end. */ 6813 // Incomplete sequence at the end.
6816 *unconvlenp = len - i; 6814 *unconvlenp = len - i;
6817 break; 6815 break;
6818 } 6816 }
6819 *d++ = ptr[i]; 6817 *d++ = ptr[i];
6820 } 6818 }
6822 { 6820 {
6823 c = utf_ptr2char(ptr + i); 6821 c = utf_ptr2char(ptr + i);
6824 if (vcp->vc_type == CONV_TO_LATIN9) 6822 if (vcp->vc_type == CONV_TO_LATIN9)
6825 switch (c) 6823 switch (c)
6826 { 6824 {
6827 case 0x20ac: c = 0xa4; break; /* euro */ 6825 case 0x20ac: c = 0xa4; break; // euro
6828 case 0x0160: c = 0xa6; break; /* S hat */ 6826 case 0x0160: c = 0xa6; break; // S hat
6829 case 0x0161: c = 0xa8; break; /* S -hat */ 6827 case 0x0161: c = 0xa8; break; // S -hat
6830 case 0x017d: c = 0xb4; break; /* Z hat */ 6828 case 0x017d: c = 0xb4; break; // Z hat
6831 case 0x017e: c = 0xb8; break; /* Z -hat */ 6829 case 0x017e: c = 0xb8; break; // Z -hat
6832 case 0x0152: c = 0xbc; break; /* OE */ 6830 case 0x0152: c = 0xbc; break; // OE
6833 case 0x0153: c = 0xbd; break; /* oe */ 6831 case 0x0153: c = 0xbd; break; // oe
6834 case 0x0178: c = 0xbe; break; /* Y */ 6832 case 0x0178: c = 0xbe; break; // Y
6835 case 0xa4: 6833 case 0xa4:
6836 case 0xa6: 6834 case 0xa6:
6837 case 0xa8: 6835 case 0xa8:
6838 case 0xb4: 6836 case 0xb4:
6839 case 0xb8: 6837 case 0xb8:
6840 case 0xbc: 6838 case 0xbc:
6841 case 0xbd: 6839 case 0xbd:
6842 case 0xbe: c = 0x100; break; /* not in latin9 */ 6840 case 0xbe: c = 0x100; break; // not in latin9
6843 } 6841 }
6844 if (!utf_iscomposing(c)) /* skip composing chars */ 6842 if (!utf_iscomposing(c)) // skip composing chars
6845 { 6843 {
6846 if (c < 0x100) 6844 if (c < 0x100)
6847 *d++ = c; 6845 *d++ = c;
6848 else if (vcp->vc_fail) 6846 else if (vcp->vc_fail)
6849 { 6847 {
6886 'u', 'm', unconvlenp); 6884 'u', 'm', unconvlenp);
6887 break; 6885 break;
6888 # endif 6886 # endif
6889 6887
6890 # ifdef USE_ICONV 6888 # ifdef USE_ICONV
6891 case CONV_ICONV: /* conversion with output_conv.vc_fd */ 6889 case CONV_ICONV: // conversion with output_conv.vc_fd
6892 retval = iconv_string(vcp, ptr, len, unconvlenp, lenp); 6890 retval = iconv_string(vcp, ptr, len, unconvlenp, lenp);
6893 break; 6891 break;
6894 # endif 6892 # endif
6895 # ifdef MSWIN 6893 # ifdef MSWIN
6896 case CONV_CODEPAGE: /* codepage -> codepage */ 6894 case CONV_CODEPAGE: // codepage -> codepage
6897 { 6895 {
6898 int retlen; 6896 int retlen;
6899 int tmp_len; 6897 int tmp_len;
6900 short_u *tmp; 6898 short_u *tmp;
6901 6899
6902 /* 1. codepage/UTF-8 -> ucs-2. */ 6900 // 1. codepage/UTF-8 -> ucs-2.
6903 if (vcp->vc_cpfrom == 0) 6901 if (vcp->vc_cpfrom == 0)
6904 tmp_len = utf8_to_utf16(ptr, len, NULL, NULL); 6902 tmp_len = utf8_to_utf16(ptr, len, NULL, NULL);
6905 else 6903 else
6906 { 6904 {
6907 tmp_len = MultiByteToWideChar(vcp->vc_cpfrom, 6905 tmp_len = MultiByteToWideChar(vcp->vc_cpfrom,
6927 utf8_to_utf16(ptr, len, tmp, unconvlenp); 6925 utf8_to_utf16(ptr, len, tmp, unconvlenp);
6928 else 6926 else
6929 MultiByteToWideChar(vcp->vc_cpfrom, 0, 6927 MultiByteToWideChar(vcp->vc_cpfrom, 0,
6930 (char *)ptr, len, tmp, tmp_len); 6928 (char *)ptr, len, tmp, tmp_len);
6931 6929
6932 /* 2. ucs-2 -> codepage/UTF-8. */ 6930 // 2. ucs-2 -> codepage/UTF-8.
6933 if (vcp->vc_cpto == 0) 6931 if (vcp->vc_cpto == 0)
6934 retlen = utf16_to_utf8(tmp, tmp_len, NULL); 6932 retlen = utf16_to_utf8(tmp, tmp_len, NULL);
6935 else 6933 else
6936 retlen = WideCharToMultiByte(vcp->vc_cpto, 0, 6934 retlen = WideCharToMultiByte(vcp->vc_cpto, 0,
6937 tmp, tmp_len, 0, 0, 0, 0); 6935 tmp, tmp_len, 0, 0, 0, 0);