diff src/arabic.c @ 7:3fc0f57ecb91 v7.0001

updated for version 7.0001
author vimboss
date Sun, 13 Jun 2004 20:20:40 +0000
parents
children f52d2ea0f81c
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/src/arabic.c
@@ -0,0 +1,1168 @@
+/* vi:set ts=8 sts=4 sw=4:
+ *
+ * VIM - Vi IMproved    by Bram Moolenaar
+ *
+ * Do ":help uganda"  in Vim to read copying and usage conditions.
+ * Do ":help credits" in Vim to see a list of people who contributed.
+ * See README.txt for an overview of the Vim source code.
+ */
+
+/*
+ * arabic.c: functions for Arabic language
+ *
+ * Included by main.c, when FEAT_ARABIC & FEAT_GUI is defined.
+ *
+ * --
+ *
+ * Author: Nadim Shaikli & Isam Bayazidi
+ *
+ */
+
+static int  A_is_a __ARGS((int cur_c));
+static int  A_is_s __ARGS((int cur_c));
+static int  A_is_f __ARGS((int cur_c));
+static int  chg_c_a2s __ARGS((int cur_c));
+static int  chg_c_a2i __ARGS((int cur_c));
+static int  chg_c_a2m __ARGS((int cur_c));
+static int  chg_c_a2f __ARGS((int cur_c));
+static int  chg_c_i2m __ARGS((int cur_c));
+static int  chg_c_f2m __ARGS((int cur_c));
+static int  chg_c_laa2i __ARGS((int hid_c));
+static int  chg_c_laa2f __ARGS((int hid_c));
+static int  half_shape __ARGS((int c));
+static int  A_firstc_laa __ARGS((int c1, int c));
+static int  A_is_harakat __ARGS((int c));
+static int  A_is_iso __ARGS((int c));
+static int  A_is_formb __ARGS((int c));
+static int  A_is_ok __ARGS((int c));
+static int  A_is_valid __ARGS((int c));
+static int  A_is_special __ARGS((int c));
+
+
+/*
+ * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
+ */
+    static int
+A_is_a(cur_c)
+    int cur_c;
+{
+    switch (cur_c)
+    {
+	case a_HAMZA:
+	case a_ALEF_MADDA:
+	case a_ALEF_HAMZA_ABOVE:
+	case a_WAW_HAMZA:
+	case a_ALEF_HAMZA_BELOW:
+	case a_YEH_HAMZA:
+	case a_ALEF:
+	case a_BEH:
+	case a_TEH_MARBUTA:
+	case a_TEH:
+	case a_THEH:
+	case a_JEEM:
+	case a_HAH:
+	case a_KHAH:
+	case a_DAL:
+	case a_THAL:
+	case a_REH:
+	case a_ZAIN:
+	case a_SEEN:
+	case a_SHEEN:
+	case a_SAD:
+	case a_DAD:
+	case a_TAH:
+	case a_ZAH:
+	case a_AIN:
+	case a_GHAIN:
+	case a_TATWEEL:
+	case a_FEH:
+	case a_QAF:
+	case a_KAF:
+	case a_LAM:
+	case a_MEEM:
+	case a_NOON:
+	case a_HEH:
+	case a_WAW:
+	case a_ALEF_MAKSURA:
+	case a_YEH:
+	    return TRUE;
+    }
+
+    return FALSE;
+}
+
+
+/*
+ * Returns True if c is an Isolated Form-B ARABIC letter
+ */
+    static int
+A_is_s(cur_c)
+    int cur_c;
+{
+    switch (cur_c)
+    {
+	case a_s_HAMZA:
+	case a_s_ALEF_MADDA:
+	case a_s_ALEF_HAMZA_ABOVE:
+	case a_s_WAW_HAMZA:
+	case a_s_ALEF_HAMZA_BELOW:
+	case a_s_YEH_HAMZA:
+	case a_s_ALEF:
+	case a_s_BEH:
+	case a_s_TEH_MARBUTA:
+	case a_s_TEH:
+	case a_s_THEH:
+	case a_s_JEEM:
+	case a_s_HAH:
+	case a_s_KHAH:
+	case a_s_DAL:
+	case a_s_THAL:
+	case a_s_REH:
+	case a_s_ZAIN:
+	case a_s_SEEN:
+	case a_s_SHEEN:
+	case a_s_SAD:
+	case a_s_DAD:
+	case a_s_TAH:
+	case a_s_ZAH:
+	case a_s_AIN:
+	case a_s_GHAIN:
+	case a_s_FEH:
+	case a_s_QAF:
+	case a_s_KAF:
+	case a_s_LAM:
+	case a_s_MEEM:
+	case a_s_NOON:
+	case a_s_HEH:
+	case a_s_WAW:
+	case a_s_ALEF_MAKSURA:
+	case a_s_YEH:
+	    return TRUE;
+    }
+
+    return FALSE;
+}
+
+
+/*
+ * Returns True if c is a Final shape of an ARABIC letter
+ */
+    static int
+A_is_f(cur_c)
+    int cur_c;
+{
+    switch (cur_c)
+    {
+	case a_f_ALEF_MADDA:
+	case a_f_ALEF_HAMZA_ABOVE:
+	case a_f_WAW_HAMZA:
+	case a_f_ALEF_HAMZA_BELOW:
+	case a_f_YEH_HAMZA:
+	case a_f_ALEF:
+	case a_f_BEH:
+	case a_f_TEH_MARBUTA:
+	case a_f_TEH:
+	case a_f_THEH:
+	case a_f_JEEM:
+	case a_f_HAH:
+	case a_f_KHAH:
+	case a_f_DAL:
+	case a_f_THAL:
+	case a_f_REH:
+	case a_f_ZAIN:
+	case a_f_SEEN:
+	case a_f_SHEEN:
+	case a_f_SAD:
+	case a_f_DAD:
+	case a_f_TAH:
+	case a_f_ZAH:
+	case a_f_AIN:
+	case a_f_GHAIN:
+	case a_f_FEH:
+	case a_f_QAF:
+	case a_f_KAF:
+	case a_f_LAM:
+	case a_f_MEEM:
+	case a_f_NOON:
+	case a_f_HEH:
+	case a_f_WAW:
+	case a_f_ALEF_MAKSURA:
+	case a_f_YEH:
+	case a_f_LAM_ALEF_MADDA_ABOVE:
+	case a_f_LAM_ALEF_HAMZA_ABOVE:
+	case a_f_LAM_ALEF_HAMZA_BELOW:
+	case a_f_LAM_ALEF:
+	    return TRUE;
+    }
+    return FALSE;
+}
+
+
+/*
+ * Change shape - from ISO-8859-6/Isolated to Form-B Isolated
+ */
+    static int
+chg_c_a2s(cur_c)
+    int cur_c;
+{
+    int tempc;
+
+    switch (cur_c)
+    {
+	case a_HAMZA:
+	    tempc = a_s_HAMZA;
+	    break;
+	case a_ALEF_MADDA:
+	    tempc = a_s_ALEF_MADDA;
+	    break;
+	case a_ALEF_HAMZA_ABOVE:
+	    tempc = a_s_ALEF_HAMZA_ABOVE;
+	    break;
+	case a_WAW_HAMZA:
+	    tempc = a_s_WAW_HAMZA;
+	    break;
+	case a_ALEF_HAMZA_BELOW:
+	    tempc = a_s_ALEF_HAMZA_BELOW;
+	    break;
+	case a_YEH_HAMZA:
+	    tempc = a_s_YEH_HAMZA;
+	    break;
+	case a_ALEF:
+	    tempc = a_s_ALEF;
+	    break;
+	case a_TEH_MARBUTA:
+	    tempc = a_s_TEH_MARBUTA;
+	    break;
+	case a_DAL:
+	    tempc = a_s_DAL;
+	    break;
+	case a_THAL:
+	    tempc = a_s_THAL;
+	    break;
+	case a_REH:
+	    tempc = a_s_REH;
+	    break;
+	case a_ZAIN:
+	    tempc = a_s_ZAIN;
+	    break;
+	case a_TATWEEL:			/* exceptions */
+	    tempc = cur_c;
+	    break;
+	case a_WAW:
+	    tempc = a_s_WAW;
+	    break;
+	case a_ALEF_MAKSURA:
+	    tempc = a_s_ALEF_MAKSURA;
+	    break;
+	case a_BEH:
+	    tempc = a_s_BEH;
+	    break;
+	case a_TEH:
+	    tempc = a_s_TEH;
+	    break;
+	case a_THEH:
+	    tempc = a_s_THEH;
+	    break;
+	case a_JEEM:
+	    tempc = a_s_JEEM;
+	    break;
+	case a_HAH:
+	    tempc = a_s_HAH;
+	    break;
+	case a_KHAH:
+	    tempc = a_s_KHAH;
+	    break;
+	case a_SEEN:
+	    tempc = a_s_SEEN;
+	    break;
+	case a_SHEEN:
+	    tempc = a_s_SHEEN;
+	    break;
+	case a_SAD:
+	    tempc = a_s_SAD;
+	    break;
+	case a_DAD:
+	    tempc = a_s_DAD;
+	    break;
+	case a_TAH:
+	    tempc = a_s_TAH;
+	    break;
+	case a_ZAH:
+	    tempc = a_s_ZAH;
+	    break;
+	case a_AIN:
+	    tempc = a_s_AIN;
+	    break;
+	case a_GHAIN:
+	    tempc = a_s_GHAIN;
+	    break;
+	case a_FEH:
+	    tempc = a_s_FEH;
+	    break;
+	case a_QAF:
+	    tempc = a_s_QAF;
+	    break;
+	case a_KAF:
+	    tempc = a_s_KAF;
+	    break;
+	case a_LAM:
+	    tempc = a_s_LAM;
+	    break;
+	case a_MEEM:
+	    tempc = a_s_MEEM;
+	    break;
+	case a_NOON:
+	    tempc = a_s_NOON;
+	    break;
+	case a_HEH:
+	    tempc = a_s_HEH;
+	    break;
+	case a_YEH:
+	    tempc = a_s_YEH;
+	    break;
+	default:
+	    tempc = 0;
+    }
+
+    return tempc;
+}
+
+
+/*
+ * Change shape - from ISO-8859-6/Isolated to Initial
+ */
+    static int
+chg_c_a2i(cur_c)
+    int cur_c;
+{
+    int tempc;
+
+    switch (cur_c)
+    {
+	case a_YEH_HAMZA:
+	    tempc = a_i_YEH_HAMZA;
+	    break;
+	case a_HAMZA:			/* exceptions */
+	    tempc = a_s_HAMZA;
+	    break;
+	case a_ALEF_MADDA:		/* exceptions */
+	    tempc = a_s_ALEF_MADDA;
+	    break;
+	case a_ALEF_HAMZA_ABOVE:	/* exceptions */
+	    tempc = a_s_ALEF_HAMZA_ABOVE;
+	    break;
+	case a_WAW_HAMZA:		/* exceptions */
+	    tempc = a_s_WAW_HAMZA;
+	    break;
+	case a_ALEF_HAMZA_BELOW:	/* exceptions */
+	    tempc = a_s_ALEF_HAMZA_BELOW;
+	    break;
+	case a_ALEF:			/* exceptions */
+	    tempc = a_s_ALEF;
+	    break;
+	case a_TEH_MARBUTA:		/* exceptions */
+	    tempc = a_s_TEH_MARBUTA;
+	    break;
+	case a_DAL:			/* exceptions */
+	    tempc = a_s_DAL;
+	    break;
+	case a_THAL:			/* exceptions */
+	    tempc = a_s_THAL;
+	    break;
+	case a_REH:			/* exceptions */
+	    tempc = a_s_REH;
+	    break;
+	case a_ZAIN:			/* exceptions */
+	    tempc = a_s_ZAIN;
+	    break;
+	case a_TATWEEL:			/* exceptions */
+	    tempc = cur_c;
+	    break;
+	case a_WAW:			/* exceptions */
+	    tempc = a_s_WAW;
+	    break;
+	case a_ALEF_MAKSURA:		/* exceptions */
+	    tempc = a_s_ALEF_MAKSURA;
+	    break;
+	case a_BEH:
+	    tempc = a_i_BEH;
+	    break;
+	case a_TEH:
+	    tempc = a_i_TEH;
+	    break;
+	case a_THEH:
+	    tempc = a_i_THEH;
+	    break;
+	case a_JEEM:
+	    tempc = a_i_JEEM;
+	    break;
+	case a_HAH:
+	    tempc = a_i_HAH;
+	    break;
+	case a_KHAH:
+	    tempc = a_i_KHAH;
+	    break;
+	case a_SEEN:
+	    tempc = a_i_SEEN;
+	    break;
+	case a_SHEEN:
+	    tempc = a_i_SHEEN;
+	    break;
+	case a_SAD:
+	    tempc = a_i_SAD;
+	    break;
+	case a_DAD:
+	    tempc = a_i_DAD;
+	    break;
+	case a_TAH:
+	    tempc = a_i_TAH;
+	    break;
+	case a_ZAH:
+	    tempc = a_i_ZAH;
+	    break;
+	case a_AIN:
+	    tempc = a_i_AIN;
+	    break;
+	case a_GHAIN:
+	    tempc = a_i_GHAIN;
+	    break;
+	case a_FEH:
+	    tempc = a_i_FEH;
+	    break;
+	case a_QAF:
+	    tempc = a_i_QAF;
+	    break;
+	case a_KAF:
+	    tempc = a_i_KAF;
+	    break;
+	case a_LAM:
+	    tempc = a_i_LAM;
+	    break;
+	case a_MEEM:
+	    tempc = a_i_MEEM;
+	    break;
+	case a_NOON:
+	    tempc = a_i_NOON;
+	    break;
+	case a_HEH:
+	    tempc = a_i_HEH;
+	    break;
+	case a_YEH:
+	    tempc = a_i_YEH;
+	    break;
+	default:
+	    tempc = 0;
+    }
+
+    return tempc;
+}
+
+
+/*
+ * Change shape - from ISO-8859-6/Isolated to Medial
+ */
+    static int
+chg_c_a2m(cur_c)
+    int cur_c;
+{
+    int tempc;
+
+    switch (cur_c)
+    {
+	case a_HAMZA:			/* exception */
+	    tempc = a_s_HAMZA;
+	    break;
+	case a_ALEF_MADDA:		/* exception */
+	    tempc = a_f_ALEF_MADDA;
+	    break;
+	case a_ALEF_HAMZA_ABOVE:	/* exception */
+	    tempc = a_f_ALEF_HAMZA_ABOVE;
+	    break;
+	case a_WAW_HAMZA:		/* exception */
+	    tempc = a_f_WAW_HAMZA;
+	    break;
+	case a_ALEF_HAMZA_BELOW:	/* exception */
+	    tempc = a_f_ALEF_HAMZA_BELOW;
+	    break;
+	case a_YEH_HAMZA:
+	    tempc = a_m_YEH_HAMZA;
+	    break;
+	case a_ALEF:			/* exception */
+	    tempc = a_f_ALEF;
+	    break;
+	case a_BEH:
+	    tempc = a_m_BEH;
+	    break;
+	case a_TEH_MARBUTA:		/* exception */
+	    tempc = a_f_TEH_MARBUTA;
+	    break;
+	case a_TEH:
+	    tempc = a_m_TEH;
+	    break;
+	case a_THEH:
+	    tempc = a_m_THEH;
+	    break;
+	case a_JEEM:
+	    tempc = a_m_JEEM;
+	    break;
+	case a_HAH:
+	    tempc = a_m_HAH;
+	    break;
+	case a_KHAH:
+	    tempc = a_m_KHAH;
+	    break;
+	case a_DAL:			/* exception */
+	    tempc = a_f_DAL;
+	    break;
+	case a_THAL:			/* exception */
+	    tempc = a_f_THAL;
+	    break;
+	case a_REH:			/* exception */
+	    tempc = a_f_REH;
+	    break;
+	case a_ZAIN:			/* exception */
+	    tempc = a_f_ZAIN;
+	    break;
+	case a_SEEN:
+	    tempc = a_m_SEEN;
+	    break;
+	case a_SHEEN:
+	    tempc = a_m_SHEEN;
+	    break;
+	case a_SAD:
+	    tempc = a_m_SAD;
+	    break;
+	case a_DAD:
+	    tempc = a_m_DAD;
+	    break;
+	case a_TAH:
+	    tempc = a_m_TAH;
+	    break;
+	case a_ZAH:
+	    tempc = a_m_ZAH;
+	    break;
+	case a_AIN:
+	    tempc = a_m_AIN;
+	    break;
+	case a_GHAIN:
+	    tempc = a_m_GHAIN;
+	    break;
+	case a_TATWEEL:			/* exception */
+	    tempc = cur_c;
+	    break;
+	case a_FEH:
+	    tempc = a_m_FEH;
+	    break;
+	case a_QAF:
+	    tempc = a_m_QAF;
+	    break;
+	case a_KAF:
+	    tempc = a_m_KAF;
+	    break;
+	case a_LAM:
+	    tempc = a_m_LAM;
+	    break;
+	case a_MEEM:
+	    tempc = a_m_MEEM;
+	    break;
+	case a_NOON:
+	    tempc = a_m_NOON;
+	    break;
+	case a_HEH:
+	    tempc = a_m_HEH;
+	    break;
+	case a_WAW:			/* exception */
+	    tempc = a_f_WAW;
+	    break;
+	case a_ALEF_MAKSURA:		/* exception */
+	    tempc = a_f_ALEF_MAKSURA;
+	    break;
+	case a_YEH:
+	    tempc = a_m_YEH;
+	    break;
+	default:
+	    tempc = 0;
+    }
+
+    return tempc;
+}
+
+
+/*
+ * Change shape - from ISO-8859-6/Isolated to final
+ */
+    static int
+chg_c_a2f(cur_c)
+    int cur_c;
+{
+    int tempc;
+
+    /* NOTE: these encodings need to be accounted for
+
+	a_f_ALEF_MADDA;
+	a_f_ALEF_HAMZA_ABOVE;
+	a_f_ALEF_HAMZA_BELOW;
+	a_f_LAM_ALEF_MADDA_ABOVE;
+	a_f_LAM_ALEF_HAMZA_ABOVE;
+	a_f_LAM_ALEF_HAMZA_BELOW;
+	*/
+
+    switch (cur_c)
+    {
+	case a_HAMZA:			/* exception */
+	    tempc = a_s_HAMZA;
+	    break;
+	case a_ALEF_MADDA:
+	    tempc = a_f_ALEF_MADDA;
+	    break;
+	case a_ALEF_HAMZA_ABOVE:
+	    tempc = a_f_ALEF_HAMZA_ABOVE;
+	    break;
+	case a_WAW_HAMZA:
+	    tempc = a_f_WAW_HAMZA;
+	    break;
+	case a_ALEF_HAMZA_BELOW:
+	    tempc = a_f_ALEF_HAMZA_BELOW;
+	    break;
+	case a_YEH_HAMZA:
+	    tempc = a_f_YEH_HAMZA;
+	    break;
+	case a_ALEF:
+	    tempc = a_f_ALEF;
+	    break;
+	case a_BEH:
+	    tempc = a_f_BEH;
+	    break;
+	case a_TEH_MARBUTA:
+	    tempc = a_f_TEH_MARBUTA;
+	    break;
+	case a_TEH:
+	    tempc = a_f_TEH;
+	    break;
+	case a_THEH:
+	    tempc = a_f_THEH;
+	    break;
+	case a_JEEM:
+	    tempc = a_f_JEEM;
+	    break;
+	case a_HAH:
+	    tempc = a_f_HAH;
+	    break;
+	case a_KHAH:
+	    tempc = a_f_KHAH;
+	    break;
+	case a_DAL:
+	    tempc = a_f_DAL;
+	    break;
+	case a_THAL:
+	    tempc = a_f_THAL;
+	    break;
+	case a_REH:
+	    tempc = a_f_REH;
+	    break;
+	case a_ZAIN:
+	    tempc = a_f_ZAIN;
+	    break;
+	case a_SEEN:
+	    tempc = a_f_SEEN;
+	    break;
+	case a_SHEEN:
+	    tempc = a_f_SHEEN;
+	    break;
+	case a_SAD:
+	    tempc = a_f_SAD;
+	    break;
+	case a_DAD:
+	    tempc = a_f_DAD;
+	    break;
+	case a_TAH:
+	    tempc = a_f_TAH;
+	    break;
+	case a_ZAH:
+	    tempc = a_f_ZAH;
+	    break;
+	case a_AIN:
+	    tempc = a_f_AIN;
+	    break;
+	case a_GHAIN:
+	    tempc = a_f_GHAIN;
+	    break;
+	case a_TATWEEL:			/* exception */
+	    tempc = cur_c;
+	    break;
+	case a_FEH:
+	    tempc = a_f_FEH;
+	    break;
+	case a_QAF:
+	    tempc = a_f_QAF;
+	    break;
+	case a_KAF:
+	    tempc = a_f_KAF;
+	    break;
+	case a_LAM:
+	    tempc = a_f_LAM;
+	    break;
+	case a_MEEM:
+	    tempc = a_f_MEEM;
+	    break;
+	case a_NOON:
+	    tempc = a_f_NOON;
+	    break;
+	case a_HEH:
+	    tempc = a_f_HEH;
+	    break;
+	case a_WAW:
+	    tempc = a_f_WAW;
+	    break;
+	case a_ALEF_MAKSURA:
+	    tempc = a_f_ALEF_MAKSURA;
+	    break;
+	case a_YEH:
+	    tempc = a_f_YEH;
+	    break;
+	default:
+	    tempc = 0;
+    }
+
+    return tempc;
+}
+
+
+/*
+ * Change shape - from Initial to Medial
+ */
+    static int
+chg_c_i2m(cur_c)
+    int cur_c;
+{
+    int tempc;
+
+    switch (cur_c)
+    {
+	case a_i_YEH_HAMZA:
+	    tempc = a_m_YEH_HAMZA;
+	    break;
+	case a_i_BEH:
+	    tempc = a_m_BEH;
+	    break;
+	case a_i_TEH:
+	    tempc = a_m_TEH;
+	    break;
+	case a_i_THEH:
+	    tempc = a_m_THEH;
+	    break;
+	case a_i_JEEM:
+	    tempc = a_m_JEEM;
+	    break;
+	case a_i_HAH:
+	    tempc = a_m_HAH;
+	    break;
+	case a_i_KHAH:
+	    tempc = a_m_KHAH;
+	    break;
+	case a_i_SEEN:
+	    tempc = a_m_SEEN;
+	    break;
+	case a_i_SHEEN:
+	    tempc = a_m_SHEEN;
+	    break;
+	case a_i_SAD:
+	    tempc = a_m_SAD;
+	    break;
+	case a_i_DAD:
+	    tempc = a_m_DAD;
+	    break;
+	case a_i_TAH:
+	    tempc = a_m_TAH;
+	    break;
+	case a_i_ZAH:
+	    tempc = a_m_ZAH;
+	    break;
+	case a_i_AIN:
+	    tempc = a_m_AIN;
+	    break;
+	case a_i_GHAIN:
+	    tempc = a_m_GHAIN;
+	    break;
+	case a_i_FEH:
+	    tempc = a_m_FEH;
+	    break;
+	case a_i_QAF:
+	    tempc = a_m_QAF;
+	    break;
+	case a_i_KAF:
+	    tempc = a_m_KAF;
+	    break;
+	case a_i_LAM:
+	    tempc = a_m_LAM;
+	    break;
+	case a_i_MEEM:
+	    tempc = a_m_MEEM;
+	    break;
+	case a_i_NOON:
+	    tempc = a_m_NOON;
+	    break;
+	case a_i_HEH:
+	    tempc = a_m_HEH;
+	    break;
+	case a_i_YEH:
+	    tempc = a_m_YEH;
+	    break;
+	default:
+	    tempc = 0;
+    }
+
+    return tempc;
+}
+
+
+/*
+ * Change shape - from Final to Medial
+ */
+    static int
+chg_c_f2m(cur_c)
+    int cur_c;
+{
+    int tempc;
+
+    switch (cur_c)
+    {
+	/* NOTE: these encodings are multi-positional, no ?
+	   case a_f_ALEF_MADDA:
+	   case a_f_ALEF_HAMZA_ABOVE:
+	   case a_f_ALEF_HAMZA_BELOW:
+	   */
+	case a_f_YEH_HAMZA:
+	    tempc = a_m_YEH_HAMZA;
+	    break;
+	case a_f_WAW_HAMZA:		/* exceptions */
+	case a_f_ALEF:
+	case a_f_TEH_MARBUTA:
+	case a_f_DAL:
+	case a_f_THAL:
+	case a_f_REH:
+	case a_f_ZAIN:
+	case a_f_WAW:
+	case a_f_ALEF_MAKSURA:
+	    tempc = cur_c;
+	    break;
+	case a_f_BEH:
+	    tempc = a_m_BEH;
+	    break;
+	case a_f_TEH:
+	    tempc = a_m_TEH;
+	    break;
+	case a_f_THEH:
+	    tempc = a_m_THEH;
+	    break;
+	case a_f_JEEM:
+	    tempc = a_m_JEEM;
+	    break;
+	case a_f_HAH:
+	    tempc = a_m_HAH;
+	    break;
+	case a_f_KHAH:
+	    tempc = a_m_KHAH;
+	    break;
+	case a_f_SEEN:
+	    tempc = a_m_SEEN;
+	    break;
+	case a_f_SHEEN:
+	    tempc = a_m_SHEEN;
+	    break;
+	case a_f_SAD:
+	    tempc = a_m_SAD;
+	    break;
+	case a_f_DAD:
+	    tempc = a_m_DAD;
+	    break;
+	case a_f_TAH:
+	    tempc = a_m_TAH;
+	    break;
+	case a_f_ZAH:
+	    tempc = a_m_ZAH;
+	    break;
+	case a_f_AIN:
+	    tempc = a_m_AIN;
+	    break;
+	case a_f_GHAIN:
+	    tempc = a_m_GHAIN;
+	    break;
+	case a_f_FEH:
+	    tempc = a_m_FEH;
+	    break;
+	case a_f_QAF:
+	    tempc = a_m_QAF;
+	    break;
+	case a_f_KAF:
+	    tempc = a_m_KAF;
+	    break;
+	case a_f_LAM:
+	    tempc = a_m_LAM;
+	    break;
+	case a_f_MEEM:
+	    tempc = a_m_MEEM;
+	    break;
+	case a_f_NOON:
+	    tempc = a_m_NOON;
+	    break;
+	case a_f_HEH:
+	    tempc = a_m_HEH;
+	    break;
+	case a_f_YEH:
+	    tempc = a_m_YEH;
+	    break;
+	    /* NOTE: these encodings are multi-positional, no ?
+		case a_f_LAM_ALEF_MADDA_ABOVE:
+		case a_f_LAM_ALEF_HAMZA_ABOVE:
+		case a_f_LAM_ALEF_HAMZA_BELOW:
+		case a_f_LAM_ALEF:
+		*/
+	default:
+	    tempc = 0;
+    }
+
+    return tempc;
+}
+
+
+/*
+ * Change shape - from Combination (2 char) to an Isolated
+ */
+    static int
+chg_c_laa2i(hid_c)
+    int hid_c;
+{
+    int tempc;
+
+    switch (hid_c)
+    {
+	case a_ALEF_MADDA:
+	    tempc = a_s_LAM_ALEF_MADDA_ABOVE;
+	    break;
+	case a_ALEF_HAMZA_ABOVE:
+	    tempc = a_s_LAM_ALEF_HAMZA_ABOVE;
+	    break;
+	case a_ALEF_HAMZA_BELOW:
+	    tempc = a_s_LAM_ALEF_HAMZA_BELOW;
+	    break;
+	case a_ALEF:
+	    tempc = a_s_LAM_ALEF;
+	    break;
+	default:
+	    tempc = 0;
+    }
+
+    return tempc;
+}
+
+
+/*
+ * Change shape - from Combination-Isolated to Final
+ */
+    static int
+chg_c_laa2f(hid_c)
+    int hid_c;
+{
+    int tempc;
+
+    switch (hid_c)
+    {
+	case a_ALEF_MADDA:
+	    tempc = a_f_LAM_ALEF_MADDA_ABOVE;
+	    break;
+	case a_ALEF_HAMZA_ABOVE:
+	    tempc = a_f_LAM_ALEF_HAMZA_ABOVE;
+	    break;
+	case a_ALEF_HAMZA_BELOW:
+	    tempc = a_f_LAM_ALEF_HAMZA_BELOW;
+	    break;
+	case a_ALEF:
+	    tempc = a_f_LAM_ALEF;
+	    break;
+	default:
+	    tempc = 0;
+    }
+
+    return tempc;
+}
+
+/*
+ * Do "half-shaping" on character "c".  Return zero if no shaping.
+ */
+    static int
+half_shape(c)
+    int		c;
+{
+    if (A_is_a(c))
+	return chg_c_a2i(c);
+    if (A_is_valid(c) && A_is_f(c))
+	return chg_c_f2m(c);
+    return 0;
+}
+
+/*
+ * Do Arabic shaping on character "c".  Returns the shaped character.
+ * out:    "ccp" points to the first byte of the character to be shaped.
+ * in/out: "c1p" points to the first composing char for "c".
+ * in:     "prev_c"  is the previous character (not shaped)
+ * in:     "prev_c1" is the first composing char for the previous char
+ *		     (not shaped)
+ * in:     "next_c"  is the next character (not shaped).
+ */
+    int
+arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c)
+    int		c;
+    int		*ccp;
+    int		*c1p;
+    int		prev_c;
+    int		prev_c1;
+    int		next_c;
+{
+    int		curr_c;
+    int		shape_c;
+    int		curr_laa;
+    int		prev_laa;
+
+    /* Deal only with Arabic character, pass back all others */
+    if (!A_is_ok(c))
+	return c;
+
+    /* half-shape current and previous character */
+    shape_c = half_shape(prev_c);
+
+    /* Save away current character */
+    curr_c = c;
+
+    curr_laa = A_firstc_laa(c, *c1p);
+    prev_laa = A_firstc_laa(prev_c, prev_c1);
+
+    if (curr_laa)
+    {
+	if (A_is_valid(prev_c) && !A_is_f(shape_c)
+					 && !A_is_s(shape_c) && !prev_laa)
+	    curr_c = chg_c_laa2f(curr_laa);
+	else
+	    curr_c = chg_c_laa2i(curr_laa);
+
+	/* Remove the composing character */
+	*c1p = 0;
+    }
+    else if (!A_is_valid(prev_c) && A_is_valid(next_c))
+	curr_c = chg_c_a2i(c);
+    else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
+	curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
+    else if (A_is_valid(next_c))
+	curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
+    else if (A_is_valid(prev_c))
+	curr_c = chg_c_a2f(c);
+    else
+	curr_c = chg_c_a2s(c);
+
+    /* Sanity check -- curr_c should, in the future, never be 0.
+     * We should, in the future, insert a fatal error here. */
+    if (curr_c == NUL)
+	curr_c = c;
+
+    if (curr_c != c && ccp != NULL)
+    {
+	char_u buf[MB_MAXBYTES];
+
+	/* Update the first byte of the character. */
+	(*mb_char2bytes)(curr_c, buf);
+	*ccp = buf[0];
+    }
+
+    /* Return the shaped character */
+    return curr_c;
+}
+
+
+/*
+ * A_firstc_laa returns first character of LAA combination if it exists
+ */
+    static int
+A_firstc_laa(c, c1)
+    int c;	/* base character */
+    int c1;	/* first composing character */
+{
+    if (c1 != NUL && c == a_LAM && !A_is_harakat(c1))
+	return c1;
+    return 0;
+}
+
+
+/*
+ * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
+ *		(harakat/tanween)
+ */
+    static int
+A_is_harakat(c)
+    int c;
+{
+    return (c >= a_FATHATAN && c <= a_SUKUN);
+}
+
+
+/*
+ * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
+ *		(alphabet/number/punctuation)
+ */
+    static int
+A_is_iso(c)
+    int c;
+{
+    return ((c >= a_HAMZA && c <= a_GHAIN)
+	    || (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
+	    || c == a_MINI_ALEF);
+}
+
+
+/*
+ * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
+ *		(alphabet/number/punctuation)
+ */
+    static int
+A_is_formb(c)
+    int c;
+{
+    return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
+	    || c == a_s_KASRATAN
+	    || (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
+	    || c == a_BYTE_ORDER_MARK);
+}
+
+
+/*
+ * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
+ */
+    static int
+A_is_ok(c)
+    int c;
+{
+    return (A_is_iso(c) || A_is_formb(c));
+}
+
+
+/*
+ * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
+ *		with some exceptions/exclusions
+ */
+    static int
+A_is_valid(c)
+    int c;
+{
+    return (A_is_ok(c) && !A_is_special(c));
+}
+
+
+/*
+ * A_is_special returns TRUE if 'c' is not a special Arabic character.
+ *		Specials don't adhere to most of the rules.
+ */
+    static int
+A_is_special(c)
+    int c;
+{
+    return (c == a_HAMZA || c == a_s_HAMZA);
+}