Mercurial > vim
view src/arabic.c @ 32721:94f4a488412e v9.0.1683
Updated runtime files
Commit: https://github.com/vim/vim/commit/6efb1980336ff324e9c57a4e282530b952fca816
Author: Christian Brabandt <cb@256bit.org>
Date: Thu Aug 10 05:44:25 2023 +0200
Updated runtime files
This is a collection of various PRs from github that all require a minor
patch number:
1) https://github.com/vim/vim/pull/12612
Do not conflate dictionary key with end of block
2) https://github.com/vim/vim/pull/12729:
When saving and restoring 'undolevels', the constructs `&undolevels` and
`:set undolevels` are problematic.
The construct `&undolevels` reads an unpredictable value; it will be the
local option value (if one has been set), or the global option value
(otherwise), making it unsuitable for saving a value for later
restoration.
Similarly, if a local option value has been set for 'undolevels',
temporarily modifying the option via `:set undolevels` changes the local
value as well as the global value, requiring extra work to restore both
values.
Saving and restoring the option value in one step via the construct
`:let &undolevels = &undolevels` appears to make no changes to the
'undolevels' option, but if a local option has been set to a different
value than the global option, it has the unintended effect of changing
the global 'undolevels' value to the local value.
Update the documentation to explain these issues and recommend explicit
use of global and local option values when saving and restoring. Update
some unit tests to use `g:undolevels`.
3) https://github.com/vim/vim/pull/12702:
Problem: Pip requirements files are not recognized.
Solution: Add a pattern to match pip requirements files.
4) https://github.com/vim/vim/pull/12688:
Add indent file and tests for ABB Rapid
5) https://github.com/vim/vim/pull/12668:
Use Lua 5.1 numeric escapes in tests and add to CI
Only Lua 5.2+ and LuaJIT understand hexadecimal escapes in strings. Lua
5.1 only supports decimal escapes:
> A character in a string can also be specified by its numerical value
> using the escape sequence \ddd, where ddd is a sequence of up to three
> decimal digits. (Note that if a numerical escape is to be followed by a
> digit, it must be expressed using exactly three digits.) Strings in Lua
> can contain any 8-bit value, including embedded zeros, which can be
> specified as '\0'.
To make sure this works with Lua 5.4 and Lua 5.1 change the Vim CI to
run with Lua 5.1 as well as Lua 5.4
6) https://github.com/vim/vim/pull/12631:
Add hurl filetype detection
7) https://github.com/vim/vim/pull/12573:
Problem: Files for haskell persistent library are not recognized
Solution: Add pattern persistentmodels for haskell persistent library
closes: #12612
closes: #12729
closes: #12702
closes: #12688
closes: #12668
closes: #12631
closes: #12573
Co-authored-by: lacygoill <lacygoill@lacygoill.me>
Co-authored-by: Michael Henry <drmikehenry@drmikehenry.com>
Co-authored-by: ObserverOfTime <chronobserver@disroot.org>
Co-authored-by: KnoP-01 <knosowski@graeffrobotics.de>
Co-authored-by: James McCoy <jamessan@jamessan.com>
Co-authored-by: Jacob Pfeifer <jacob@pfeifer.dev>
Co-authored-by: Borys Lykah <lykahb@fastmail.com>
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Thu, 10 Aug 2023 06:30:06 +0200 |
parents | 7334bf933510 |
children |
line wrap: on
line source
/* vi:set ts=8 sts=4 sw=4 noet: * * VIM - Vi IMproved by Bram Moolenaar * * Do ":help uganda" in Vim to read copying and usage conditions. * Do ":help credits" in Vim to see a list of people who contributed. * See README.txt for an overview of the Vim source code. */ /* * arabic.c: functions for Arabic language * * Author: Nadim Shaikli & Isam Bayazidi * Farsi support and restructuring to make adding new letters easier by Ali * Gholami Rudi. Further work by Ameretat Reith. */ /* * Sorted list of unicode Arabic characters. Each entry holds the * presentation forms of a letter. * * Arabic characters are categorized into following types: * * Isolated - iso-8859-6 form * Initial - unicode form-B start * Medial - unicode form-B middle * Final - unicode form-B final * Stand-Alone - unicode form-B isolated */ #include "vim.h" #if defined(FEAT_ARABIC) || defined(PROTO) // Unicode values for Arabic characters. #define a_HAMZA 0x0621 #define a_ALEF_MADDA 0x0622 #define a_ALEF_HAMZA_ABOVE 0x0623 #define a_WAW_HAMZA 0x0624 #define a_ALEF_HAMZA_BELOW 0x0625 #define a_YEH_HAMZA 0x0626 #define a_ALEF 0x0627 #define a_BEH 0x0628 #define a_TEH_MARBUTA 0x0629 #define a_TEH 0x062a #define a_THEH 0x062b #define a_JEEM 0x062c #define a_HAH 0x062d #define a_KHAH 0x062e #define a_DAL 0x062f #define a_THAL 0x0630 #define a_REH 0x0631 #define a_ZAIN 0x0632 #define a_SEEN 0x0633 #define a_SHEEN 0x0634 #define a_SAD 0x0635 #define a_DAD 0x0636 #define a_TAH 0x0637 #define a_ZAH 0x0638 #define a_AIN 0x0639 #define a_GHAIN 0x063a #define a_TATWEEL 0x0640 #define a_FEH 0x0641 #define a_QAF 0x0642 #define a_KAF 0x0643 #define a_LAM 0x0644 #define a_MEEM 0x0645 #define a_NOON 0x0646 #define a_HEH 0x0647 #define a_WAW 0x0648 #define a_ALEF_MAKSURA 0x0649 #define a_YEH 0x064a #define a_FATHATAN 0x064b #define a_DAMMATAN 0x064c #define a_KASRATAN 0x064d #define a_FATHA 0x064e #define a_DAMMA 0x064f #define a_KASRA 0x0650 #define a_SHADDA 0x0651 #define a_SUKUN 0x0652 #define a_MADDA_ABOVE 0x0653 #define a_HAMZA_ABOVE 0x0654 #define a_HAMZA_BELOW 0x0655 #define a_PEH 0x067e #define a_TCHEH 0x0686 #define a_JEH 0x0698 #define a_FKAF 0x06a9 #define a_GAF 0x06af #define a_FYEH 0x06cc #define a_s_LAM_ALEF_MADDA_ABOVE 0xfef5 #define a_f_LAM_ALEF_MADDA_ABOVE 0xfef6 #define a_s_LAM_ALEF_HAMZA_ABOVE 0xfef7 #define a_f_LAM_ALEF_HAMZA_ABOVE 0xfef8 #define a_s_LAM_ALEF_HAMZA_BELOW 0xfef9 #define a_f_LAM_ALEF_HAMZA_BELOW 0xfefa #define a_s_LAM_ALEF 0xfefb #define a_f_LAM_ALEF 0xfefc static struct achar { unsigned c; unsigned isolated; unsigned initial; unsigned medial; unsigned final; } achars[] = { {a_HAMZA, 0xfe80, 0, 0, 0}, {a_ALEF_MADDA, 0xfe81, 0, 0, 0xfe82}, {a_ALEF_HAMZA_ABOVE, 0xfe83, 0, 0, 0xfe84}, {a_WAW_HAMZA, 0xfe85, 0, 0, 0xfe86}, {a_ALEF_HAMZA_BELOW, 0xfe87, 0, 0, 0xfe88}, {a_YEH_HAMZA, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a}, {a_ALEF, 0xfe8d, 0, 0, 0xfe8e}, {a_BEH, 0xfe8f, 0xfe91, 0xfe92, 0xfe90}, {a_TEH_MARBUTA, 0xfe93, 0, 0, 0xfe94}, {a_TEH, 0xfe95, 0xfe97, 0xfe98, 0xfe96}, {a_THEH, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a}, {a_JEEM, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e}, {a_HAH, 0xfea1, 0xfea3, 0xfea4, 0xfea2}, {a_KHAH, 0xfea5, 0xfea7, 0xfea8, 0xfea6}, {a_DAL, 0xfea9, 0, 0, 0xfeaa}, {a_THAL, 0xfeab, 0, 0, 0xfeac}, {a_REH, 0xfead, 0, 0, 0xfeae}, {a_ZAIN, 0xfeaf, 0, 0, 0xfeb0}, {a_SEEN, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2}, {a_SHEEN, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6}, {a_SAD, 0xfeb9, 0xfebb, 0xfebc, 0xfeba}, {a_DAD, 0xfebd, 0xfebf, 0xfec0, 0xfebe}, {a_TAH, 0xfec1, 0xfec3, 0xfec4, 0xfec2}, {a_ZAH, 0xfec5, 0xfec7, 0xfec8, 0xfec6}, {a_AIN, 0xfec9, 0xfecb, 0xfecc, 0xfeca}, {a_GHAIN, 0xfecd, 0xfecf, 0xfed0, 0xfece}, {a_TATWEEL, 0, 0x0640, 0x0640, 0x0640}, {a_FEH, 0xfed1, 0xfed3, 0xfed4, 0xfed2}, {a_QAF, 0xfed5, 0xfed7, 0xfed8, 0xfed6}, {a_KAF, 0xfed9, 0xfedb, 0xfedc, 0xfeda}, {a_LAM, 0xfedd, 0xfedf, 0xfee0, 0xfede}, {a_MEEM, 0xfee1, 0xfee3, 0xfee4, 0xfee2}, {a_NOON, 0xfee5, 0xfee7, 0xfee8, 0xfee6}, {a_HEH, 0xfee9, 0xfeeb, 0xfeec, 0xfeea}, {a_WAW, 0xfeed, 0, 0, 0xfeee}, {a_ALEF_MAKSURA, 0xfeef, 0, 0, 0xfef0}, {a_YEH, 0xfef1, 0xfef3, 0xfef4, 0xfef2}, {a_FATHATAN, 0xfe70, 0, 0, 0}, {a_DAMMATAN, 0xfe72, 0, 0, 0}, {a_KASRATAN, 0xfe74, 0, 0, 0}, {a_FATHA, 0xfe76, 0, 0xfe77, 0}, {a_DAMMA, 0xfe78, 0, 0xfe79, 0}, {a_KASRA, 0xfe7a, 0, 0xfe7b, 0}, {a_SHADDA, 0xfe7c, 0, 0xfe7c, 0}, {a_SUKUN, 0xfe7e, 0, 0xfe7f, 0}, {a_MADDA_ABOVE, 0, 0, 0, 0}, {a_HAMZA_ABOVE, 0, 0, 0, 0}, {a_HAMZA_BELOW, 0, 0, 0, 0}, {a_PEH, 0xfb56, 0xfb58, 0xfb59, 0xfb57}, {a_TCHEH, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b}, {a_JEH, 0xfb8a, 0, 0, 0xfb8b}, {a_FKAF, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f}, {a_GAF, 0xfb92, 0xfb94, 0xfb95, 0xfb93}, {a_FYEH, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd}, }; #define a_BYTE_ORDER_MARK 0xfeff /* * Find the struct achar pointer to the given Arabic char. * Returns NULL if not found. */ static struct achar * find_achar(int c) { int h, m, l; // using binary search to find c h = ARRAY_LENGTH(achars); l = 0; while (l < h) { m = (h + l) / 2; if (achars[m].c == (unsigned)c) return &achars[m]; if ((unsigned)c < achars[m].c) h = m; else l = m + 1; } return NULL; } /* * Change shape - from Combination (2 char) to an Isolated */ static int chg_c_laa2i(int hid_c) { int tempc; switch (hid_c) { case a_ALEF_MADDA: tempc = a_s_LAM_ALEF_MADDA_ABOVE; break; case a_ALEF_HAMZA_ABOVE: tempc = a_s_LAM_ALEF_HAMZA_ABOVE; break; case a_ALEF_HAMZA_BELOW: tempc = a_s_LAM_ALEF_HAMZA_BELOW; break; case a_ALEF: tempc = a_s_LAM_ALEF; break; default: tempc = 0; } return tempc; } /* * Change shape - from Combination-Isolated to Final */ static int chg_c_laa2f(int hid_c) { int tempc; switch (hid_c) { case a_ALEF_MADDA: tempc = a_f_LAM_ALEF_MADDA_ABOVE; break; case a_ALEF_HAMZA_ABOVE: tempc = a_f_LAM_ALEF_HAMZA_ABOVE; break; case a_ALEF_HAMZA_BELOW: tempc = a_f_LAM_ALEF_HAMZA_BELOW; break; case a_ALEF: tempc = a_f_LAM_ALEF; break; default: tempc = 0; } return tempc; } /* * Returns whether it is possible to join the given letters */ static int can_join(int c1, int c2) { struct achar *a1 = find_achar(c1); struct achar *a2 = find_achar(c2); return a1 && a2 && (a1->initial || a1->medial) && (a2->final || a2->medial); } /* * Check whether we are dealing with a character that could be regarded as an * Arabic combining character, need to check the character before this. */ int arabic_maycombine(int two) { if (p_arshape && !p_tbidi) return (two == a_ALEF_MADDA || two == a_ALEF_HAMZA_ABOVE || two == a_ALEF_HAMZA_BELOW || two == a_ALEF); return FALSE; } /* * Check whether we are dealing with Arabic combining characters. * Note: these are NOT really composing characters! */ int arabic_combine( int one, // first character int two) // character just after "one" { if (one == a_LAM) return arabic_maycombine(two); return FALSE; } /* * A_is_iso returns true if 'c' is an Arabic ISO-8859-6 character * (alphabet/number/punctuation) */ static int A_is_iso(int c) { return find_achar(c) != NULL; } /* * A_is_ok returns true if 'c' is an Arabic 10646 (8859-6 or Form-B) */ static int A_is_ok(int c) { return (A_is_iso(c) || c == a_BYTE_ORDER_MARK); } /* * A_is_valid returns true if 'c' is an Arabic 10646 (8859-6 or Form-B) * with some exceptions/exclusions */ static int A_is_valid(int c) { return (A_is_ok(c) && c != a_HAMZA); } /* * Do Arabic shaping on character "c". Returns the shaped character. * out: "ccp" points to the first byte of the character to be shaped. * in/out: "c1p" points to the first composing char for "c". * in: "prev_c" is the previous character (not shaped) * in: "prev_c1" is the first composing char for the previous char * (not shaped) * in: "next_c" is the next character (not shaped). */ int arabic_shape( int c, int *ccp, int *c1p, int prev_c, int prev_c1, int next_c) { int curr_c; int curr_laa; int prev_laa; // Deal only with Arabic characters, pass back all others if (!A_is_ok(c)) return c; curr_laa = arabic_combine(c, *c1p); prev_laa = arabic_combine(prev_c, prev_c1); if (curr_laa) { if (A_is_valid(prev_c) && can_join(prev_c, a_LAM) && !prev_laa) curr_c = chg_c_laa2f(*c1p); else curr_c = chg_c_laa2i(*c1p); // Remove the composing character *c1p = 0; } else { struct achar *curr_a = find_achar(c); int backward_combine = !prev_laa && can_join(prev_c, c); int forward_combine = can_join(c, next_c); if (backward_combine) { if (forward_combine) curr_c = curr_a->medial; else curr_c = curr_a->final; } else { if (forward_combine) curr_c = curr_a->initial; else curr_c = curr_a->isolated; } } // Character missing from the table means using original character. if (curr_c == NUL) curr_c = c; if (curr_c != c && ccp != NULL) { char_u buf[MB_MAXBYTES + 1]; // Update the first byte of the character. (*mb_char2bytes)(curr_c, buf); *ccp = buf[0]; } // Return the shaped character return curr_c; } #endif // FEAT_ARABIC