7
|
1 /* vi:set ts=8 sts=4 sw=4:
|
|
2 *
|
|
3 * VIM - Vi IMproved by Bram Moolenaar
|
|
4 *
|
|
5 * Do ":help uganda" in Vim to read copying and usage conditions.
|
|
6 * Do ":help credits" in Vim to see a list of people who contributed.
|
|
7 * See README.txt for an overview of the Vim source code.
|
|
8 */
|
|
9
|
|
10 /*
|
|
11 * arabic.c: functions for Arabic language
|
|
12 *
|
|
13 * Included by main.c, when FEAT_ARABIC & FEAT_GUI is defined.
|
|
14 *
|
|
15 * --
|
|
16 *
|
|
17 * Author: Nadim Shaikli & Isam Bayazidi
|
|
18 *
|
|
19 */
|
|
20
|
|
21 static int A_is_a __ARGS((int cur_c));
|
|
22 static int A_is_s __ARGS((int cur_c));
|
|
23 static int A_is_f __ARGS((int cur_c));
|
|
24 static int chg_c_a2s __ARGS((int cur_c));
|
|
25 static int chg_c_a2i __ARGS((int cur_c));
|
|
26 static int chg_c_a2m __ARGS((int cur_c));
|
|
27 static int chg_c_a2f __ARGS((int cur_c));
|
|
28 static int chg_c_i2m __ARGS((int cur_c));
|
|
29 static int chg_c_f2m __ARGS((int cur_c));
|
|
30 static int chg_c_laa2i __ARGS((int hid_c));
|
|
31 static int chg_c_laa2f __ARGS((int hid_c));
|
|
32 static int half_shape __ARGS((int c));
|
|
33 static int A_firstc_laa __ARGS((int c1, int c));
|
|
34 static int A_is_harakat __ARGS((int c));
|
|
35 static int A_is_iso __ARGS((int c));
|
|
36 static int A_is_formb __ARGS((int c));
|
|
37 static int A_is_ok __ARGS((int c));
|
|
38 static int A_is_valid __ARGS((int c));
|
|
39 static int A_is_special __ARGS((int c));
|
|
40
|
|
41
|
|
42 /*
|
|
43 * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
|
|
44 */
|
|
45 static int
|
|
46 A_is_a(cur_c)
|
|
47 int cur_c;
|
|
48 {
|
|
49 switch (cur_c)
|
|
50 {
|
|
51 case a_HAMZA:
|
|
52 case a_ALEF_MADDA:
|
|
53 case a_ALEF_HAMZA_ABOVE:
|
|
54 case a_WAW_HAMZA:
|
|
55 case a_ALEF_HAMZA_BELOW:
|
|
56 case a_YEH_HAMZA:
|
|
57 case a_ALEF:
|
|
58 case a_BEH:
|
|
59 case a_TEH_MARBUTA:
|
|
60 case a_TEH:
|
|
61 case a_THEH:
|
|
62 case a_JEEM:
|
|
63 case a_HAH:
|
|
64 case a_KHAH:
|
|
65 case a_DAL:
|
|
66 case a_THAL:
|
|
67 case a_REH:
|
|
68 case a_ZAIN:
|
|
69 case a_SEEN:
|
|
70 case a_SHEEN:
|
|
71 case a_SAD:
|
|
72 case a_DAD:
|
|
73 case a_TAH:
|
|
74 case a_ZAH:
|
|
75 case a_AIN:
|
|
76 case a_GHAIN:
|
|
77 case a_TATWEEL:
|
|
78 case a_FEH:
|
|
79 case a_QAF:
|
|
80 case a_KAF:
|
|
81 case a_LAM:
|
|
82 case a_MEEM:
|
|
83 case a_NOON:
|
|
84 case a_HEH:
|
|
85 case a_WAW:
|
|
86 case a_ALEF_MAKSURA:
|
|
87 case a_YEH:
|
|
88 return TRUE;
|
|
89 }
|
|
90
|
|
91 return FALSE;
|
|
92 }
|
|
93
|
|
94
|
|
95 /*
|
|
96 * Returns True if c is an Isolated Form-B ARABIC letter
|
|
97 */
|
|
98 static int
|
|
99 A_is_s(cur_c)
|
|
100 int cur_c;
|
|
101 {
|
|
102 switch (cur_c)
|
|
103 {
|
|
104 case a_s_HAMZA:
|
|
105 case a_s_ALEF_MADDA:
|
|
106 case a_s_ALEF_HAMZA_ABOVE:
|
|
107 case a_s_WAW_HAMZA:
|
|
108 case a_s_ALEF_HAMZA_BELOW:
|
|
109 case a_s_YEH_HAMZA:
|
|
110 case a_s_ALEF:
|
|
111 case a_s_BEH:
|
|
112 case a_s_TEH_MARBUTA:
|
|
113 case a_s_TEH:
|
|
114 case a_s_THEH:
|
|
115 case a_s_JEEM:
|
|
116 case a_s_HAH:
|
|
117 case a_s_KHAH:
|
|
118 case a_s_DAL:
|
|
119 case a_s_THAL:
|
|
120 case a_s_REH:
|
|
121 case a_s_ZAIN:
|
|
122 case a_s_SEEN:
|
|
123 case a_s_SHEEN:
|
|
124 case a_s_SAD:
|
|
125 case a_s_DAD:
|
|
126 case a_s_TAH:
|
|
127 case a_s_ZAH:
|
|
128 case a_s_AIN:
|
|
129 case a_s_GHAIN:
|
|
130 case a_s_FEH:
|
|
131 case a_s_QAF:
|
|
132 case a_s_KAF:
|
|
133 case a_s_LAM:
|
|
134 case a_s_MEEM:
|
|
135 case a_s_NOON:
|
|
136 case a_s_HEH:
|
|
137 case a_s_WAW:
|
|
138 case a_s_ALEF_MAKSURA:
|
|
139 case a_s_YEH:
|
|
140 return TRUE;
|
|
141 }
|
|
142
|
|
143 return FALSE;
|
|
144 }
|
|
145
|
|
146
|
|
147 /*
|
|
148 * Returns True if c is a Final shape of an ARABIC letter
|
|
149 */
|
|
150 static int
|
|
151 A_is_f(cur_c)
|
|
152 int cur_c;
|
|
153 {
|
|
154 switch (cur_c)
|
|
155 {
|
|
156 case a_f_ALEF_MADDA:
|
|
157 case a_f_ALEF_HAMZA_ABOVE:
|
|
158 case a_f_WAW_HAMZA:
|
|
159 case a_f_ALEF_HAMZA_BELOW:
|
|
160 case a_f_YEH_HAMZA:
|
|
161 case a_f_ALEF:
|
|
162 case a_f_BEH:
|
|
163 case a_f_TEH_MARBUTA:
|
|
164 case a_f_TEH:
|
|
165 case a_f_THEH:
|
|
166 case a_f_JEEM:
|
|
167 case a_f_HAH:
|
|
168 case a_f_KHAH:
|
|
169 case a_f_DAL:
|
|
170 case a_f_THAL:
|
|
171 case a_f_REH:
|
|
172 case a_f_ZAIN:
|
|
173 case a_f_SEEN:
|
|
174 case a_f_SHEEN:
|
|
175 case a_f_SAD:
|
|
176 case a_f_DAD:
|
|
177 case a_f_TAH:
|
|
178 case a_f_ZAH:
|
|
179 case a_f_AIN:
|
|
180 case a_f_GHAIN:
|
|
181 case a_f_FEH:
|
|
182 case a_f_QAF:
|
|
183 case a_f_KAF:
|
|
184 case a_f_LAM:
|
|
185 case a_f_MEEM:
|
|
186 case a_f_NOON:
|
|
187 case a_f_HEH:
|
|
188 case a_f_WAW:
|
|
189 case a_f_ALEF_MAKSURA:
|
|
190 case a_f_YEH:
|
|
191 case a_f_LAM_ALEF_MADDA_ABOVE:
|
|
192 case a_f_LAM_ALEF_HAMZA_ABOVE:
|
|
193 case a_f_LAM_ALEF_HAMZA_BELOW:
|
|
194 case a_f_LAM_ALEF:
|
|
195 return TRUE;
|
|
196 }
|
|
197 return FALSE;
|
|
198 }
|
|
199
|
|
200
|
|
201 /*
|
|
202 * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
|
|
203 */
|
|
204 static int
|
|
205 chg_c_a2s(cur_c)
|
|
206 int cur_c;
|
|
207 {
|
|
208 int tempc;
|
|
209
|
|
210 switch (cur_c)
|
|
211 {
|
|
212 case a_HAMZA:
|
|
213 tempc = a_s_HAMZA;
|
|
214 break;
|
|
215 case a_ALEF_MADDA:
|
|
216 tempc = a_s_ALEF_MADDA;
|
|
217 break;
|
|
218 case a_ALEF_HAMZA_ABOVE:
|
|
219 tempc = a_s_ALEF_HAMZA_ABOVE;
|
|
220 break;
|
|
221 case a_WAW_HAMZA:
|
|
222 tempc = a_s_WAW_HAMZA;
|
|
223 break;
|
|
224 case a_ALEF_HAMZA_BELOW:
|
|
225 tempc = a_s_ALEF_HAMZA_BELOW;
|
|
226 break;
|
|
227 case a_YEH_HAMZA:
|
|
228 tempc = a_s_YEH_HAMZA;
|
|
229 break;
|
|
230 case a_ALEF:
|
|
231 tempc = a_s_ALEF;
|
|
232 break;
|
|
233 case a_TEH_MARBUTA:
|
|
234 tempc = a_s_TEH_MARBUTA;
|
|
235 break;
|
|
236 case a_DAL:
|
|
237 tempc = a_s_DAL;
|
|
238 break;
|
|
239 case a_THAL:
|
|
240 tempc = a_s_THAL;
|
|
241 break;
|
|
242 case a_REH:
|
|
243 tempc = a_s_REH;
|
|
244 break;
|
|
245 case a_ZAIN:
|
|
246 tempc = a_s_ZAIN;
|
|
247 break;
|
|
248 case a_TATWEEL: /* exceptions */
|
|
249 tempc = cur_c;
|
|
250 break;
|
|
251 case a_WAW:
|
|
252 tempc = a_s_WAW;
|
|
253 break;
|
|
254 case a_ALEF_MAKSURA:
|
|
255 tempc = a_s_ALEF_MAKSURA;
|
|
256 break;
|
|
257 case a_BEH:
|
|
258 tempc = a_s_BEH;
|
|
259 break;
|
|
260 case a_TEH:
|
|
261 tempc = a_s_TEH;
|
|
262 break;
|
|
263 case a_THEH:
|
|
264 tempc = a_s_THEH;
|
|
265 break;
|
|
266 case a_JEEM:
|
|
267 tempc = a_s_JEEM;
|
|
268 break;
|
|
269 case a_HAH:
|
|
270 tempc = a_s_HAH;
|
|
271 break;
|
|
272 case a_KHAH:
|
|
273 tempc = a_s_KHAH;
|
|
274 break;
|
|
275 case a_SEEN:
|
|
276 tempc = a_s_SEEN;
|
|
277 break;
|
|
278 case a_SHEEN:
|
|
279 tempc = a_s_SHEEN;
|
|
280 break;
|
|
281 case a_SAD:
|
|
282 tempc = a_s_SAD;
|
|
283 break;
|
|
284 case a_DAD:
|
|
285 tempc = a_s_DAD;
|
|
286 break;
|
|
287 case a_TAH:
|
|
288 tempc = a_s_TAH;
|
|
289 break;
|
|
290 case a_ZAH:
|
|
291 tempc = a_s_ZAH;
|
|
292 break;
|
|
293 case a_AIN:
|
|
294 tempc = a_s_AIN;
|
|
295 break;
|
|
296 case a_GHAIN:
|
|
297 tempc = a_s_GHAIN;
|
|
298 break;
|
|
299 case a_FEH:
|
|
300 tempc = a_s_FEH;
|
|
301 break;
|
|
302 case a_QAF:
|
|
303 tempc = a_s_QAF;
|
|
304 break;
|
|
305 case a_KAF:
|
|
306 tempc = a_s_KAF;
|
|
307 break;
|
|
308 case a_LAM:
|
|
309 tempc = a_s_LAM;
|
|
310 break;
|
|
311 case a_MEEM:
|
|
312 tempc = a_s_MEEM;
|
|
313 break;
|
|
314 case a_NOON:
|
|
315 tempc = a_s_NOON;
|
|
316 break;
|
|
317 case a_HEH:
|
|
318 tempc = a_s_HEH;
|
|
319 break;
|
|
320 case a_YEH:
|
|
321 tempc = a_s_YEH;
|
|
322 break;
|
|
323 default:
|
|
324 tempc = 0;
|
|
325 }
|
|
326
|
|
327 return tempc;
|
|
328 }
|
|
329
|
|
330
|
|
331 /*
|
|
332 * Change shape - from ISO-8859-6/Isolated to Initial
|
|
333 */
|
|
334 static int
|
|
335 chg_c_a2i(cur_c)
|
|
336 int cur_c;
|
|
337 {
|
|
338 int tempc;
|
|
339
|
|
340 switch (cur_c)
|
|
341 {
|
|
342 case a_YEH_HAMZA:
|
|
343 tempc = a_i_YEH_HAMZA;
|
|
344 break;
|
|
345 case a_HAMZA: /* exceptions */
|
|
346 tempc = a_s_HAMZA;
|
|
347 break;
|
|
348 case a_ALEF_MADDA: /* exceptions */
|
|
349 tempc = a_s_ALEF_MADDA;
|
|
350 break;
|
|
351 case a_ALEF_HAMZA_ABOVE: /* exceptions */
|
|
352 tempc = a_s_ALEF_HAMZA_ABOVE;
|
|
353 break;
|
|
354 case a_WAW_HAMZA: /* exceptions */
|
|
355 tempc = a_s_WAW_HAMZA;
|
|
356 break;
|
|
357 case a_ALEF_HAMZA_BELOW: /* exceptions */
|
|
358 tempc = a_s_ALEF_HAMZA_BELOW;
|
|
359 break;
|
|
360 case a_ALEF: /* exceptions */
|
|
361 tempc = a_s_ALEF;
|
|
362 break;
|
|
363 case a_TEH_MARBUTA: /* exceptions */
|
|
364 tempc = a_s_TEH_MARBUTA;
|
|
365 break;
|
|
366 case a_DAL: /* exceptions */
|
|
367 tempc = a_s_DAL;
|
|
368 break;
|
|
369 case a_THAL: /* exceptions */
|
|
370 tempc = a_s_THAL;
|
|
371 break;
|
|
372 case a_REH: /* exceptions */
|
|
373 tempc = a_s_REH;
|
|
374 break;
|
|
375 case a_ZAIN: /* exceptions */
|
|
376 tempc = a_s_ZAIN;
|
|
377 break;
|
|
378 case a_TATWEEL: /* exceptions */
|
|
379 tempc = cur_c;
|
|
380 break;
|
|
381 case a_WAW: /* exceptions */
|
|
382 tempc = a_s_WAW;
|
|
383 break;
|
|
384 case a_ALEF_MAKSURA: /* exceptions */
|
|
385 tempc = a_s_ALEF_MAKSURA;
|
|
386 break;
|
|
387 case a_BEH:
|
|
388 tempc = a_i_BEH;
|
|
389 break;
|
|
390 case a_TEH:
|
|
391 tempc = a_i_TEH;
|
|
392 break;
|
|
393 case a_THEH:
|
|
394 tempc = a_i_THEH;
|
|
395 break;
|
|
396 case a_JEEM:
|
|
397 tempc = a_i_JEEM;
|
|
398 break;
|
|
399 case a_HAH:
|
|
400 tempc = a_i_HAH;
|
|
401 break;
|
|
402 case a_KHAH:
|
|
403 tempc = a_i_KHAH;
|
|
404 break;
|
|
405 case a_SEEN:
|
|
406 tempc = a_i_SEEN;
|
|
407 break;
|
|
408 case a_SHEEN:
|
|
409 tempc = a_i_SHEEN;
|
|
410 break;
|
|
411 case a_SAD:
|
|
412 tempc = a_i_SAD;
|
|
413 break;
|
|
414 case a_DAD:
|
|
415 tempc = a_i_DAD;
|
|
416 break;
|
|
417 case a_TAH:
|
|
418 tempc = a_i_TAH;
|
|
419 break;
|
|
420 case a_ZAH:
|
|
421 tempc = a_i_ZAH;
|
|
422 break;
|
|
423 case a_AIN:
|
|
424 tempc = a_i_AIN;
|
|
425 break;
|
|
426 case a_GHAIN:
|
|
427 tempc = a_i_GHAIN;
|
|
428 break;
|
|
429 case a_FEH:
|
|
430 tempc = a_i_FEH;
|
|
431 break;
|
|
432 case a_QAF:
|
|
433 tempc = a_i_QAF;
|
|
434 break;
|
|
435 case a_KAF:
|
|
436 tempc = a_i_KAF;
|
|
437 break;
|
|
438 case a_LAM:
|
|
439 tempc = a_i_LAM;
|
|
440 break;
|
|
441 case a_MEEM:
|
|
442 tempc = a_i_MEEM;
|
|
443 break;
|
|
444 case a_NOON:
|
|
445 tempc = a_i_NOON;
|
|
446 break;
|
|
447 case a_HEH:
|
|
448 tempc = a_i_HEH;
|
|
449 break;
|
|
450 case a_YEH:
|
|
451 tempc = a_i_YEH;
|
|
452 break;
|
|
453 default:
|
|
454 tempc = 0;
|
|
455 }
|
|
456
|
|
457 return tempc;
|
|
458 }
|
|
459
|
|
460
|
|
461 /*
|
|
462 * Change shape - from ISO-8859-6/Isolated to Medial
|
|
463 */
|
|
464 static int
|
|
465 chg_c_a2m(cur_c)
|
|
466 int cur_c;
|
|
467 {
|
|
468 int tempc;
|
|
469
|
|
470 switch (cur_c)
|
|
471 {
|
|
472 case a_HAMZA: /* exception */
|
|
473 tempc = a_s_HAMZA;
|
|
474 break;
|
|
475 case a_ALEF_MADDA: /* exception */
|
|
476 tempc = a_f_ALEF_MADDA;
|
|
477 break;
|
|
478 case a_ALEF_HAMZA_ABOVE: /* exception */
|
|
479 tempc = a_f_ALEF_HAMZA_ABOVE;
|
|
480 break;
|
|
481 case a_WAW_HAMZA: /* exception */
|
|
482 tempc = a_f_WAW_HAMZA;
|
|
483 break;
|
|
484 case a_ALEF_HAMZA_BELOW: /* exception */
|
|
485 tempc = a_f_ALEF_HAMZA_BELOW;
|
|
486 break;
|
|
487 case a_YEH_HAMZA:
|
|
488 tempc = a_m_YEH_HAMZA;
|
|
489 break;
|
|
490 case a_ALEF: /* exception */
|
|
491 tempc = a_f_ALEF;
|
|
492 break;
|
|
493 case a_BEH:
|
|
494 tempc = a_m_BEH;
|
|
495 break;
|
|
496 case a_TEH_MARBUTA: /* exception */
|
|
497 tempc = a_f_TEH_MARBUTA;
|
|
498 break;
|
|
499 case a_TEH:
|
|
500 tempc = a_m_TEH;
|
|
501 break;
|
|
502 case a_THEH:
|
|
503 tempc = a_m_THEH;
|
|
504 break;
|
|
505 case a_JEEM:
|
|
506 tempc = a_m_JEEM;
|
|
507 break;
|
|
508 case a_HAH:
|
|
509 tempc = a_m_HAH;
|
|
510 break;
|
|
511 case a_KHAH:
|
|
512 tempc = a_m_KHAH;
|
|
513 break;
|
|
514 case a_DAL: /* exception */
|
|
515 tempc = a_f_DAL;
|
|
516 break;
|
|
517 case a_THAL: /* exception */
|
|
518 tempc = a_f_THAL;
|
|
519 break;
|
|
520 case a_REH: /* exception */
|
|
521 tempc = a_f_REH;
|
|
522 break;
|
|
523 case a_ZAIN: /* exception */
|
|
524 tempc = a_f_ZAIN;
|
|
525 break;
|
|
526 case a_SEEN:
|
|
527 tempc = a_m_SEEN;
|
|
528 break;
|
|
529 case a_SHEEN:
|
|
530 tempc = a_m_SHEEN;
|
|
531 break;
|
|
532 case a_SAD:
|
|
533 tempc = a_m_SAD;
|
|
534 break;
|
|
535 case a_DAD:
|
|
536 tempc = a_m_DAD;
|
|
537 break;
|
|
538 case a_TAH:
|
|
539 tempc = a_m_TAH;
|
|
540 break;
|
|
541 case a_ZAH:
|
|
542 tempc = a_m_ZAH;
|
|
543 break;
|
|
544 case a_AIN:
|
|
545 tempc = a_m_AIN;
|
|
546 break;
|
|
547 case a_GHAIN:
|
|
548 tempc = a_m_GHAIN;
|
|
549 break;
|
|
550 case a_TATWEEL: /* exception */
|
|
551 tempc = cur_c;
|
|
552 break;
|
|
553 case a_FEH:
|
|
554 tempc = a_m_FEH;
|
|
555 break;
|
|
556 case a_QAF:
|
|
557 tempc = a_m_QAF;
|
|
558 break;
|
|
559 case a_KAF:
|
|
560 tempc = a_m_KAF;
|
|
561 break;
|
|
562 case a_LAM:
|
|
563 tempc = a_m_LAM;
|
|
564 break;
|
|
565 case a_MEEM:
|
|
566 tempc = a_m_MEEM;
|
|
567 break;
|
|
568 case a_NOON:
|
|
569 tempc = a_m_NOON;
|
|
570 break;
|
|
571 case a_HEH:
|
|
572 tempc = a_m_HEH;
|
|
573 break;
|
|
574 case a_WAW: /* exception */
|
|
575 tempc = a_f_WAW;
|
|
576 break;
|
|
577 case a_ALEF_MAKSURA: /* exception */
|
|
578 tempc = a_f_ALEF_MAKSURA;
|
|
579 break;
|
|
580 case a_YEH:
|
|
581 tempc = a_m_YEH;
|
|
582 break;
|
|
583 default:
|
|
584 tempc = 0;
|
|
585 }
|
|
586
|
|
587 return tempc;
|
|
588 }
|
|
589
|
|
590
|
|
591 /*
|
|
592 * Change shape - from ISO-8859-6/Isolated to final
|
|
593 */
|
|
594 static int
|
|
595 chg_c_a2f(cur_c)
|
|
596 int cur_c;
|
|
597 {
|
|
598 int tempc;
|
|
599
|
|
600 /* NOTE: these encodings need to be accounted for
|
|
601
|
|
602 a_f_ALEF_MADDA;
|
|
603 a_f_ALEF_HAMZA_ABOVE;
|
|
604 a_f_ALEF_HAMZA_BELOW;
|
|
605 a_f_LAM_ALEF_MADDA_ABOVE;
|
|
606 a_f_LAM_ALEF_HAMZA_ABOVE;
|
|
607 a_f_LAM_ALEF_HAMZA_BELOW;
|
|
608 */
|
|
609
|
|
610 switch (cur_c)
|
|
611 {
|
|
612 case a_HAMZA: /* exception */
|
|
613 tempc = a_s_HAMZA;
|
|
614 break;
|
|
615 case a_ALEF_MADDA:
|
|
616 tempc = a_f_ALEF_MADDA;
|
|
617 break;
|
|
618 case a_ALEF_HAMZA_ABOVE:
|
|
619 tempc = a_f_ALEF_HAMZA_ABOVE;
|
|
620 break;
|
|
621 case a_WAW_HAMZA:
|
|
622 tempc = a_f_WAW_HAMZA;
|
|
623 break;
|
|
624 case a_ALEF_HAMZA_BELOW:
|
|
625 tempc = a_f_ALEF_HAMZA_BELOW;
|
|
626 break;
|
|
627 case a_YEH_HAMZA:
|
|
628 tempc = a_f_YEH_HAMZA;
|
|
629 break;
|
|
630 case a_ALEF:
|
|
631 tempc = a_f_ALEF;
|
|
632 break;
|
|
633 case a_BEH:
|
|
634 tempc = a_f_BEH;
|
|
635 break;
|
|
636 case a_TEH_MARBUTA:
|
|
637 tempc = a_f_TEH_MARBUTA;
|
|
638 break;
|
|
639 case a_TEH:
|
|
640 tempc = a_f_TEH;
|
|
641 break;
|
|
642 case a_THEH:
|
|
643 tempc = a_f_THEH;
|
|
644 break;
|
|
645 case a_JEEM:
|
|
646 tempc = a_f_JEEM;
|
|
647 break;
|
|
648 case a_HAH:
|
|
649 tempc = a_f_HAH;
|
|
650 break;
|
|
651 case a_KHAH:
|
|
652 tempc = a_f_KHAH;
|
|
653 break;
|
|
654 case a_DAL:
|
|
655 tempc = a_f_DAL;
|
|
656 break;
|
|
657 case a_THAL:
|
|
658 tempc = a_f_THAL;
|
|
659 break;
|
|
660 case a_REH:
|
|
661 tempc = a_f_REH;
|
|
662 break;
|
|
663 case a_ZAIN:
|
|
664 tempc = a_f_ZAIN;
|
|
665 break;
|
|
666 case a_SEEN:
|
|
667 tempc = a_f_SEEN;
|
|
668 break;
|
|
669 case a_SHEEN:
|
|
670 tempc = a_f_SHEEN;
|
|
671 break;
|
|
672 case a_SAD:
|
|
673 tempc = a_f_SAD;
|
|
674 break;
|
|
675 case a_DAD:
|
|
676 tempc = a_f_DAD;
|
|
677 break;
|
|
678 case a_TAH:
|
|
679 tempc = a_f_TAH;
|
|
680 break;
|
|
681 case a_ZAH:
|
|
682 tempc = a_f_ZAH;
|
|
683 break;
|
|
684 case a_AIN:
|
|
685 tempc = a_f_AIN;
|
|
686 break;
|
|
687 case a_GHAIN:
|
|
688 tempc = a_f_GHAIN;
|
|
689 break;
|
|
690 case a_TATWEEL: /* exception */
|
|
691 tempc = cur_c;
|
|
692 break;
|
|
693 case a_FEH:
|
|
694 tempc = a_f_FEH;
|
|
695 break;
|
|
696 case a_QAF:
|
|
697 tempc = a_f_QAF;
|
|
698 break;
|
|
699 case a_KAF:
|
|
700 tempc = a_f_KAF;
|
|
701 break;
|
|
702 case a_LAM:
|
|
703 tempc = a_f_LAM;
|
|
704 break;
|
|
705 case a_MEEM:
|
|
706 tempc = a_f_MEEM;
|
|
707 break;
|
|
708 case a_NOON:
|
|
709 tempc = a_f_NOON;
|
|
710 break;
|
|
711 case a_HEH:
|
|
712 tempc = a_f_HEH;
|
|
713 break;
|
|
714 case a_WAW:
|
|
715 tempc = a_f_WAW;
|
|
716 break;
|
|
717 case a_ALEF_MAKSURA:
|
|
718 tempc = a_f_ALEF_MAKSURA;
|
|
719 break;
|
|
720 case a_YEH:
|
|
721 tempc = a_f_YEH;
|
|
722 break;
|
|
723 default:
|
|
724 tempc = 0;
|
|
725 }
|
|
726
|
|
727 return tempc;
|
|
728 }
|
|
729
|
|
730
|
|
731 /*
|
|
732 * Change shape - from Initial to Medial
|
|
733 */
|
|
734 static int
|
|
735 chg_c_i2m(cur_c)
|
|
736 int cur_c;
|
|
737 {
|
|
738 int tempc;
|
|
739
|
|
740 switch (cur_c)
|
|
741 {
|
|
742 case a_i_YEH_HAMZA:
|
|
743 tempc = a_m_YEH_HAMZA;
|
|
744 break;
|
|
745 case a_i_BEH:
|
|
746 tempc = a_m_BEH;
|
|
747 break;
|
|
748 case a_i_TEH:
|
|
749 tempc = a_m_TEH;
|
|
750 break;
|
|
751 case a_i_THEH:
|
|
752 tempc = a_m_THEH;
|
|
753 break;
|
|
754 case a_i_JEEM:
|
|
755 tempc = a_m_JEEM;
|
|
756 break;
|
|
757 case a_i_HAH:
|
|
758 tempc = a_m_HAH;
|
|
759 break;
|
|
760 case a_i_KHAH:
|
|
761 tempc = a_m_KHAH;
|
|
762 break;
|
|
763 case a_i_SEEN:
|
|
764 tempc = a_m_SEEN;
|
|
765 break;
|
|
766 case a_i_SHEEN:
|
|
767 tempc = a_m_SHEEN;
|
|
768 break;
|
|
769 case a_i_SAD:
|
|
770 tempc = a_m_SAD;
|
|
771 break;
|
|
772 case a_i_DAD:
|
|
773 tempc = a_m_DAD;
|
|
774 break;
|
|
775 case a_i_TAH:
|
|
776 tempc = a_m_TAH;
|
|
777 break;
|
|
778 case a_i_ZAH:
|
|
779 tempc = a_m_ZAH;
|
|
780 break;
|
|
781 case a_i_AIN:
|
|
782 tempc = a_m_AIN;
|
|
783 break;
|
|
784 case a_i_GHAIN:
|
|
785 tempc = a_m_GHAIN;
|
|
786 break;
|
|
787 case a_i_FEH:
|
|
788 tempc = a_m_FEH;
|
|
789 break;
|
|
790 case a_i_QAF:
|
|
791 tempc = a_m_QAF;
|
|
792 break;
|
|
793 case a_i_KAF:
|
|
794 tempc = a_m_KAF;
|
|
795 break;
|
|
796 case a_i_LAM:
|
|
797 tempc = a_m_LAM;
|
|
798 break;
|
|
799 case a_i_MEEM:
|
|
800 tempc = a_m_MEEM;
|
|
801 break;
|
|
802 case a_i_NOON:
|
|
803 tempc = a_m_NOON;
|
|
804 break;
|
|
805 case a_i_HEH:
|
|
806 tempc = a_m_HEH;
|
|
807 break;
|
|
808 case a_i_YEH:
|
|
809 tempc = a_m_YEH;
|
|
810 break;
|
|
811 default:
|
|
812 tempc = 0;
|
|
813 }
|
|
814
|
|
815 return tempc;
|
|
816 }
|
|
817
|
|
818
|
|
819 /*
|
|
820 * Change shape - from Final to Medial
|
|
821 */
|
|
822 static int
|
|
823 chg_c_f2m(cur_c)
|
|
824 int cur_c;
|
|
825 {
|
|
826 int tempc;
|
|
827
|
|
828 switch (cur_c)
|
|
829 {
|
|
830 /* NOTE: these encodings are multi-positional, no ?
|
|
831 case a_f_ALEF_MADDA:
|
|
832 case a_f_ALEF_HAMZA_ABOVE:
|
|
833 case a_f_ALEF_HAMZA_BELOW:
|
|
834 */
|
|
835 case a_f_YEH_HAMZA:
|
|
836 tempc = a_m_YEH_HAMZA;
|
|
837 break;
|
|
838 case a_f_WAW_HAMZA: /* exceptions */
|
|
839 case a_f_ALEF:
|
|
840 case a_f_TEH_MARBUTA:
|
|
841 case a_f_DAL:
|
|
842 case a_f_THAL:
|
|
843 case a_f_REH:
|
|
844 case a_f_ZAIN:
|
|
845 case a_f_WAW:
|
|
846 case a_f_ALEF_MAKSURA:
|
|
847 tempc = cur_c;
|
|
848 break;
|
|
849 case a_f_BEH:
|
|
850 tempc = a_m_BEH;
|
|
851 break;
|
|
852 case a_f_TEH:
|
|
853 tempc = a_m_TEH;
|
|
854 break;
|
|
855 case a_f_THEH:
|
|
856 tempc = a_m_THEH;
|
|
857 break;
|
|
858 case a_f_JEEM:
|
|
859 tempc = a_m_JEEM;
|
|
860 break;
|
|
861 case a_f_HAH:
|
|
862 tempc = a_m_HAH;
|
|
863 break;
|
|
864 case a_f_KHAH:
|
|
865 tempc = a_m_KHAH;
|
|
866 break;
|
|
867 case a_f_SEEN:
|
|
868 tempc = a_m_SEEN;
|
|
869 break;
|
|
870 case a_f_SHEEN:
|
|
871 tempc = a_m_SHEEN;
|
|
872 break;
|
|
873 case a_f_SAD:
|
|
874 tempc = a_m_SAD;
|
|
875 break;
|
|
876 case a_f_DAD:
|
|
877 tempc = a_m_DAD;
|
|
878 break;
|
|
879 case a_f_TAH:
|
|
880 tempc = a_m_TAH;
|
|
881 break;
|
|
882 case a_f_ZAH:
|
|
883 tempc = a_m_ZAH;
|
|
884 break;
|
|
885 case a_f_AIN:
|
|
886 tempc = a_m_AIN;
|
|
887 break;
|
|
888 case a_f_GHAIN:
|
|
889 tempc = a_m_GHAIN;
|
|
890 break;
|
|
891 case a_f_FEH:
|
|
892 tempc = a_m_FEH;
|
|
893 break;
|
|
894 case a_f_QAF:
|
|
895 tempc = a_m_QAF;
|
|
896 break;
|
|
897 case a_f_KAF:
|
|
898 tempc = a_m_KAF;
|
|
899 break;
|
|
900 case a_f_LAM:
|
|
901 tempc = a_m_LAM;
|
|
902 break;
|
|
903 case a_f_MEEM:
|
|
904 tempc = a_m_MEEM;
|
|
905 break;
|
|
906 case a_f_NOON:
|
|
907 tempc = a_m_NOON;
|
|
908 break;
|
|
909 case a_f_HEH:
|
|
910 tempc = a_m_HEH;
|
|
911 break;
|
|
912 case a_f_YEH:
|
|
913 tempc = a_m_YEH;
|
|
914 break;
|
|
915 /* NOTE: these encodings are multi-positional, no ?
|
|
916 case a_f_LAM_ALEF_MADDA_ABOVE:
|
|
917 case a_f_LAM_ALEF_HAMZA_ABOVE:
|
|
918 case a_f_LAM_ALEF_HAMZA_BELOW:
|
|
919 case a_f_LAM_ALEF:
|
|
920 */
|
|
921 default:
|
|
922 tempc = 0;
|
|
923 }
|
|
924
|
|
925 return tempc;
|
|
926 }
|
|
927
|
|
928
|
|
929 /*
|
|
930 * Change shape - from Combination (2 char) to an Isolated
|
|
931 */
|
|
932 static int
|
|
933 chg_c_laa2i(hid_c)
|
|
934 int hid_c;
|
|
935 {
|
|
936 int tempc;
|
|
937
|
|
938 switch (hid_c)
|
|
939 {
|
|
940 case a_ALEF_MADDA:
|
|
941 tempc = a_s_LAM_ALEF_MADDA_ABOVE;
|
|
942 break;
|
|
943 case a_ALEF_HAMZA_ABOVE:
|
|
944 tempc = a_s_LAM_ALEF_HAMZA_ABOVE;
|
|
945 break;
|
|
946 case a_ALEF_HAMZA_BELOW:
|
|
947 tempc = a_s_LAM_ALEF_HAMZA_BELOW;
|
|
948 break;
|
|
949 case a_ALEF:
|
|
950 tempc = a_s_LAM_ALEF;
|
|
951 break;
|
|
952 default:
|
|
953 tempc = 0;
|
|
954 }
|
|
955
|
|
956 return tempc;
|
|
957 }
|
|
958
|
|
959
|
|
960 /*
|
|
961 * Change shape - from Combination-Isolated to Final
|
|
962 */
|
|
963 static int
|
|
964 chg_c_laa2f(hid_c)
|
|
965 int hid_c;
|
|
966 {
|
|
967 int tempc;
|
|
968
|
|
969 switch (hid_c)
|
|
970 {
|
|
971 case a_ALEF_MADDA:
|
|
972 tempc = a_f_LAM_ALEF_MADDA_ABOVE;
|
|
973 break;
|
|
974 case a_ALEF_HAMZA_ABOVE:
|
|
975 tempc = a_f_LAM_ALEF_HAMZA_ABOVE;
|
|
976 break;
|
|
977 case a_ALEF_HAMZA_BELOW:
|
|
978 tempc = a_f_LAM_ALEF_HAMZA_BELOW;
|
|
979 break;
|
|
980 case a_ALEF:
|
|
981 tempc = a_f_LAM_ALEF;
|
|
982 break;
|
|
983 default:
|
|
984 tempc = 0;
|
|
985 }
|
|
986
|
|
987 return tempc;
|
|
988 }
|
|
989
|
|
990 /*
|
|
991 * Do "half-shaping" on character "c". Return zero if no shaping.
|
|
992 */
|
|
993 static int
|
|
994 half_shape(c)
|
|
995 int c;
|
|
996 {
|
|
997 if (A_is_a(c))
|
|
998 return chg_c_a2i(c);
|
|
999 if (A_is_valid(c) && A_is_f(c))
|
|
1000 return chg_c_f2m(c);
|
|
1001 return 0;
|
|
1002 }
|
|
1003
|
|
1004 /*
|
|
1005 * Do Arabic shaping on character "c". Returns the shaped character.
|
|
1006 * out: "ccp" points to the first byte of the character to be shaped.
|
|
1007 * in/out: "c1p" points to the first composing char for "c".
|
|
1008 * in: "prev_c" is the previous character (not shaped)
|
|
1009 * in: "prev_c1" is the first composing char for the previous char
|
|
1010 * (not shaped)
|
|
1011 * in: "next_c" is the next character (not shaped).
|
|
1012 */
|
|
1013 int
|
|
1014 arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
|
|
1015 int c;
|
|
1016 int *ccp;
|
|
1017 int *c1p;
|
|
1018 int prev_c;
|
|
1019 int prev_c1;
|
|
1020 int next_c;
|
|
1021 {
|
|
1022 int curr_c;
|
|
1023 int shape_c;
|
|
1024 int curr_laa;
|
|
1025 int prev_laa;
|
|
1026
|
|
1027 /* Deal only with Arabic character, pass back all others */
|
|
1028 if (!A_is_ok(c))
|
|
1029 return c;
|
|
1030
|
|
1031 /* half-shape current and previous character */
|
|
1032 shape_c = half_shape(prev_c);
|
|
1033
|
|
1034 /* Save away current character */
|
|
1035 curr_c = c;
|
|
1036
|
|
1037 curr_laa = A_firstc_laa(c, *c1p);
|
|
1038 prev_laa = A_firstc_laa(prev_c, prev_c1);
|
|
1039
|
|
1040 if (curr_laa)
|
|
1041 {
|
|
1042 if (A_is_valid(prev_c) && !A_is_f(shape_c)
|
|
1043 && !A_is_s(shape_c) && !prev_laa)
|
|
1044 curr_c = chg_c_laa2f(curr_laa);
|
|
1045 else
|
|
1046 curr_c = chg_c_laa2i(curr_laa);
|
|
1047
|
|
1048 /* Remove the composing character */
|
|
1049 *c1p = 0;
|
|
1050 }
|
|
1051 else if (!A_is_valid(prev_c) && A_is_valid(next_c))
|
|
1052 curr_c = chg_c_a2i(c);
|
|
1053 else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
|
|
1054 curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
|
|
1055 else if (A_is_valid(next_c))
|
|
1056 curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
|
|
1057 else if (A_is_valid(prev_c))
|
|
1058 curr_c = chg_c_a2f(c);
|
|
1059 else
|
|
1060 curr_c = chg_c_a2s(c);
|
|
1061
|
|
1062 /* Sanity check -- curr_c should, in the future, never be 0.
|
|
1063 * We should, in the future, insert a fatal error here. */
|
|
1064 if (curr_c == NUL)
|
|
1065 curr_c = c;
|
|
1066
|
|
1067 if (curr_c != c && ccp != NULL)
|
|
1068 {
|
|
1069 char_u buf[MB_MAXBYTES];
|
|
1070
|
|
1071 /* Update the first byte of the character. */
|
|
1072 (*mb_char2bytes)(curr_c, buf);
|
|
1073 *ccp = buf[0];
|
|
1074 }
|
|
1075
|
|
1076 /* Return the shaped character */
|
|
1077 return curr_c;
|
|
1078 }
|
|
1079
|
|
1080
|
|
1081 /*
|
|
1082 * A_firstc_laa returns first character of LAA combination if it exists
|
|
1083 */
|
|
1084 static int
|
|
1085 A_firstc_laa(c, c1)
|
|
1086 int c; /* base character */
|
|
1087 int c1; /* first composing character */
|
|
1088 {
|
|
1089 if (c1 != NUL && c == a_LAM && !A_is_harakat(c1))
|
|
1090 return c1;
|
|
1091 return 0;
|
|
1092 }
|
|
1093
|
|
1094
|
|
1095 /*
|
|
1096 * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
|
|
1097 * (harakat/tanween)
|
|
1098 */
|
|
1099 static int
|
|
1100 A_is_harakat(c)
|
|
1101 int c;
|
|
1102 {
|
|
1103 return (c >= a_FATHATAN && c <= a_SUKUN);
|
|
1104 }
|
|
1105
|
|
1106
|
|
1107 /*
|
|
1108 * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
|
|
1109 * (alphabet/number/punctuation)
|
|
1110 */
|
|
1111 static int
|
|
1112 A_is_iso(c)
|
|
1113 int c;
|
|
1114 {
|
|
1115 return ((c >= a_HAMZA && c <= a_GHAIN)
|
|
1116 || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
|
|
1117 || c == a_MINI_ALEF);
|
|
1118 }
|
|
1119
|
|
1120
|
|
1121 /*
|
|
1122 * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
|
|
1123 * (alphabet/number/punctuation)
|
|
1124 */
|
|
1125 static int
|
|
1126 A_is_formb(c)
|
|
1127 int c;
|
|
1128 {
|
|
1129 return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
|
|
1130 || c == a_s_KASRATAN
|
|
1131 || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
|
|
1132 || c == a_BYTE_ORDER_MARK);
|
|
1133 }
|
|
1134
|
|
1135
|
|
1136 /*
|
|
1137 * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
|
|
1138 */
|
|
1139 static int
|
|
1140 A_is_ok(c)
|
|
1141 int c;
|
|
1142 {
|
|
1143 return (A_is_iso(c) || A_is_formb(c));
|
|
1144 }
|
|
1145
|
|
1146
|
|
1147 /*
|
|
1148 * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
|
|
1149 * with some exceptions/exclusions
|
|
1150 */
|
|
1151 static int
|
|
1152 A_is_valid(c)
|
|
1153 int c;
|
|
1154 {
|
|
1155 return (A_is_ok(c) && !A_is_special(c));
|
|
1156 }
|
|
1157
|
|
1158
|
|
1159 /*
|
|
1160 * A_is_special returns TRUE if 'c' is not a special Arabic character.
|
|
1161 * Specials don't adhere to most of the rules.
|
|
1162 */
|
|
1163 static int
|
|
1164 A_is_special(c)
|
|
1165 int c;
|
|
1166 {
|
|
1167 return (c == a_HAMZA || c == a_s_HAMZA);
|
|
1168 }
|