comparison src/libvterm/src/unicode.c @ 20518:a4652d7ec99f v8.2.0813

patch 8.2.0813: libvterm code is slightly different from upstream Commit: https://github.com/vim/vim/commit/591cec8366e87a172495c362477cbf5de8d399f0 Author: Bram Moolenaar <Bram@vim.org> Date: Fri May 22 22:06:06 2020 +0200 patch 8.2.0813: libvterm code is slightly different from upstream Problem: libvterm code is slightly different from upstream. Solution: Use upstream text to avoid future merge problems. Mainly comment style changes.
author Bram Moolenaar <Bram@vim.org>
date Fri, 22 May 2020 22:15:04 +0200
parents e02d45e302a2
children 82336c3b679d
comparison
equal deleted inserted replaced
20517:a7c6cd0d7ba0 20518:a4652d7ec99f
1 #include "vterm_internal.h" 1 #include "vterm_internal.h"
2 2
3 /* ### The following from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c 3 // ### The following from http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
4 * With modifications: 4 // With modifications:
5 * made functions static 5 // made functions static
6 * moved 'combining' table to file scope, so other functions can see it 6 // moved 'combining' table to file scope, so other functions can see it
7 * ################################################################### 7 // ###################################################################
8 */
9 8
10 /* 9 /*
11 * This is an implementation of wcwidth() and wcswidth() (defined in 10 * This is an implementation of wcwidth() and wcswidth() (defined in
12 * IEEE Std 1002.1-2001) for Unicode. 11 * IEEE Std 1002.1-2001) for Unicode.
13 * 12 *
73 int last; 72 int last;
74 }; 73 };
75 74
76 #if !defined(WCWIDTH_FUNCTION) || !defined(IS_COMBINING_FUNCTION) 75 #if !defined(WCWIDTH_FUNCTION) || !defined(IS_COMBINING_FUNCTION)
77 76
78 // sorted list of non-overlapping intervals of non-spacing characters 77 /* sorted list of non-overlapping intervals of non-spacing characters */
79 // generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" 78 /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
80 // Replaced by the combining table from Vim. 79 // Replaced by the combining table from Vim.
81 static const struct interval combining[] = { 80 static const struct interval combining[] = {
82 {0X0300, 0X036F}, 81 {0X0300, 0X036F},
83 {0X0483, 0X0489}, 82 {0X0483, 0X0489},
84 {0X0591, 0X05BD}, 83 {0X0591, 0X05BD},
360 {0X1E944, 0X1E94A}, 359 {0X1E944, 0X1E94A},
361 {0XE0100, 0XE01EF} 360 {0XE0100, 0XE01EF}
362 }; 361 };
363 #endif 362 #endif
364 363
365 // auxiliary function for binary search in interval table 364 /* auxiliary function for binary search in interval table */
366 static int bisearch(uint32_t ucs, const struct interval *table, int max) { 365 static int bisearch(uint32_t ucs, const struct interval *table, int max) {
367 int min = 0; 366 int min = 0;
368 int mid; 367 int mid;
369 368
370 if ((int)ucs < table[0].first || (int)ucs > table[max].last) 369 if ((int)ucs < table[0].first || (int)ucs > table[max].last)
380 } 379 }
381 380
382 return 0; 381 return 0;
383 } 382 }
384 383
384
385 /* The following two functions define the column width of an ISO 10646 385 /* The following two functions define the column width of an ISO 10646
386 * character as follows: 386 * character as follows:
387 * 387 *
388 * - The null character (U+0000) has a column width of 0. 388 * - The null character (U+0000) has a column width of 0.
389 * 389 *
420 #else 420 #else
421 # define WCWIDTH_FUNCTION mk_wcwidth 421 # define WCWIDTH_FUNCTION mk_wcwidth
422 422
423 static int mk_wcwidth(uint32_t ucs) 423 static int mk_wcwidth(uint32_t ucs)
424 { 424 {
425 // test for 8-bit control characters 425 /* test for 8-bit control characters */
426 if (ucs == 0) 426 if (ucs == 0)
427 return 0; 427 return 0;
428 if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) 428 if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
429 return -1; 429 return -1;
430 430
431 // binary search in table of non-spacing characters 431 /* binary search in table of non-spacing characters */
432 if (bisearch(ucs, combining, 432 if (bisearch(ucs, combining,
433 sizeof(combining) / sizeof(struct interval) - 1)) 433 sizeof(combining) / sizeof(struct interval) - 1))
434 return 0; 434 return 0;
435 435
436 // if we arrive here, ucs is not a combining or C0/C1 control character 436 /* if we arrive here, ucs is not a combining or C0/C1 control character */
437 437
438 return 1 + 438 return 1 +
439 (ucs >= 0x1100 && 439 (ucs >= 0x1100 &&
440 (ucs <= 0x115f || // Hangul Jamo init. consonants 440 (ucs <= 0x115f || /* Hangul Jamo init. consonants */
441 ucs == 0x2329 || ucs == 0x232a || 441 ucs == 0x2329 || ucs == 0x232a ||
442 (ucs >= 0x2e80 && ucs <= 0xa4cf && 442 (ucs >= 0x2e80 && ucs <= 0xa4cf &&
443 ucs != 0x303f) || // CJK ... Yi 443 ucs != 0x303f) || /* CJK ... Yi */
444 (ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables 444 (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
445 (ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compatibility Ideographs 445 (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
446 (ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms 446 (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
447 (ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compatibility Forms 447 (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
448 (ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms 448 (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
449 (ucs >= 0xffe0 && ucs <= 0xffe6) || 449 (ucs >= 0xffe0 && ucs <= 0xffe6) ||
450 (ucs >= 0x20000 && ucs <= 0x2fffd) || 450 (ucs >= 0x20000 && ucs <= 0x2fffd) ||
451 (ucs >= 0x30000 && ucs <= 0x3fffd))); 451 (ucs >= 0x30000 && ucs <= 0x3fffd)));
452 } 452 }
453 #endif 453 #endif
477 * otherwise recommended for general use. 477 * otherwise recommended for general use.
478 */ 478 */
479 static int mk_wcwidth_cjk(uint32_t ucs) 479 static int mk_wcwidth_cjk(uint32_t ucs)
480 { 480 {
481 #endif 481 #endif
482 // sorted list of non-overlapping intervals of East Asian Ambiguous 482 /* sorted list of non-overlapping intervals of East Asian Ambiguous
483 // characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" 483 * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
484 static const struct interval ambiguous[] = { 484 static const struct interval ambiguous[] = {
485 { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 }, 485 { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
486 { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 }, 486 { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
487 { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 }, 487 { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
488 { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 }, 488 { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
535 { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF }, 535 { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
536 { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD } 536 { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD }
537 }; 537 };
538 #if 0 538 #if 0
539 539
540 // binary search in table of non-spacing characters 540 /* binary search in table of non-spacing characters */
541 if (bisearch(ucs, ambiguous, 541 if (bisearch(ucs, ambiguous,
542 sizeof(ambiguous) / sizeof(struct interval) - 1)) 542 sizeof(ambiguous) / sizeof(struct interval) - 1))
543 return 2; 543 return 2;
544 544
545 return mk_wcwidth(ucs); 545 return mk_wcwidth(ucs);
546 } 546 }
547
547 548
548 static int mk_wcswidth_cjk(const uint32_t *pwcs, size_t n) 549 static int mk_wcswidth_cjk(const uint32_t *pwcs, size_t n)
549 { 550 {
550 int w, width = 0; 551 int w, width = 0;
551 552