changeset 8629:54ac275e3fc4 v7.4.1604

commit https://github.com/vim/vim/commit/3848e00e0177abdb31bc600234967863ec487233 Author: Bram Moolenaar <Bram@vim.org> Date: Sat Mar 19 18:42:29 2016 +0100 patch 7.4.1604 Problem: Although emoji characters are ambiguous width, best is to treat them as full width. Solution: Update the Unicode character tables. Add the 'emoji' options. (Yasuhiro Matsumoto)
author Christian Brabandt <cb@256bit.org>
date Sat, 19 Mar 2016 18:45:04 +0100
parents 43d8da45ac18
children 062d86d6ef92
files runtime/doc/options.txt runtime/optwin.vim runtime/tools/unicode.vim src/mbyte.c src/option.c src/option.h src/version.c
diffstat 7 files changed, 208 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/runtime/doc/options.txt
+++ b/runtime/doc/options.txt
@@ -1,4 +1,4 @@
-*options.txt*	For Vim version 7.4.  Last change: 2016 Mar 08
+*options.txt*	For Vim version 7.4.  Last change: 2016 Mar 19
 
 
 		  VIM REFERENCE MANUAL	  by Bram Moolenaar
@@ -69,7 +69,7 @@ 1. Setting options					*set-option* *E76
 :se[t] {option}:{value}
 			Set string or number option to {value}.
 			For numeric options the value can be given in decimal,
- 			hex (preceded with 0x) or octal (preceded with '0').
+			hex (preceded with 0x) or octal (preceded with '0').
 			The old value can be inserted by typing 'wildchar' (by
 			default this is a <Tab> or CTRL-E if 'compatible' is
 			set).  See |cmdline-completion|.
@@ -1002,7 +1002,7 @@ A jump table for the options with a shor
 	- The backup file will be created in the first directory in the list
 	  where this is possible.  The directory must exist, Vim will not
 	  create it for you.
-	- Empty means that no backup file will be created ( 'patchmode' is
+	- Empty means that no backup file will be created ('patchmode' is
 	  impossible!).  Writing may fail because of this.
 	- A directory "." means to put the backup file in the same directory
 	  as the edited file.
@@ -1792,7 +1792,7 @@ A jump table for the options with a shor
 	when CTRL-P or CTRL-N are used.  It is also used for whole-line
 	completion |i_CTRL-X_CTRL-L|.  It indicates the type of completion
 	and the places to scan.  It is a comma separated list of flags:
-	.	scan the current buffer ( 'wrapscan' is ignored)
+	.	scan the current buffer ('wrapscan' is ignored)
 	w	scan buffers from other windows
 	b	scan other loaded buffers that are in the buffer list
 	u	scan the unloaded buffers that are in the buffer list
@@ -2644,6 +2644,15 @@ A jump table for the options with a shor
 	also 'gdefault' option.
 	Switching this option on is discouraged!
 
+						*'emoji'* *'emo'*
+'emoji' 'emo'	boolean (default: on)
+			global
+			{not in Vi}
+			{only available when compiled with the |+multi_byte|
+			feature}
+	When on all Unicode emoji characters are considered to be full width.
+
+
 					*'encoding'* *'enc'* *E543*
 'encoding' 'enc'	string (default: "latin1" or value from $LANG)
 			global
@@ -3732,7 +3741,7 @@ A jump table for the options with a shor
 		The same applies to the modeless selection.
 								*'go-P'*
 	  'P'	Like autoselect but using the "+ register instead of the "*
-	  	register.
+		register.
 								*'go-A'*
 	  'A'	Autoselect for the modeless selection.  Like 'a', but only
 		applies to the modeless selection.
@@ -4841,7 +4850,7 @@ A jump table for the options with a shor
 	reset this option. |-u| |--noplugin|
 
 						*'luadll'*
-'luadll' 		string	(default depends on the build)
+'luadll'		string	(default depends on the build)
 			global
 			{not in Vi}
 			{only available when compiled with the |+lua/dyn|
@@ -5298,7 +5307,7 @@ A jump table for the options with a shor
 	respectively; see |CTRL-A| for more info on these commands.
 	alpha	If included, single alphabetical characters will be
 		incremented or decremented.  This is useful for a list with a
-		letter index a), b), etc.	  	*octal-nrformats*
+		letter index a), b), etc.		*octal-nrformats*
 	octal	If included, numbers that start with a zero will be considered
 		to be octal.  Example: Using CTRL-A on "007" results in "010".
 	hex	If included, numbers starting with "0x" or "0X" will be
@@ -5328,7 +5337,7 @@ A jump table for the options with a shor
 	relative to the cursor.  Together with 'number' there are these
 	four combinations (cursor in line 3):
 
-              	'nonu'          'nu'            'nonu'          'nu'
+		'nonu'          'nu'            'nonu'          'nu'
 		'nornu'         'nornu'         'rnu'           'rnu'
 
 	    |apple          |  1 apple      |  2 apple      |  2 apple
@@ -5567,7 +5576,7 @@ A jump table for the options with a shor
 	this doesn't work when $INCL contains a comma or white space.
 
 						*'perldll'*
-'perldll' 		string	(default depends on the build)
+'perldll'		string	(default depends on the build)
 			global
 			{not in Vi}
 			{only available when compiled with the |+perl/dyn|
@@ -5704,7 +5713,7 @@ A jump table for the options with a shor
 	|ins-completion-menu|.
 
 						*'pythondll'*
-'pythondll' 		string	(default depends on the build)
+'pythondll'		string	(default depends on the build)
 			global
 			{not in Vi}
 			{only available when compiled with the |+python/dyn|
@@ -5945,7 +5954,7 @@ A jump table for the options with a shor
 	The 'rightleft' option must be set for 'rightleftcmd' to take effect.
 
 						*'rubydll'*
-'rubydll' 		string	(default: depends on the build)
+'rubydll'		string	(default: depends on the build)
 			global
 			{not in Vi}
 			{only available when compiled with the |+ruby/dyn|
@@ -7439,7 +7448,7 @@ A jump table for the options with a shor
 	mapping which should not change the tagstack.
 
 						*'tcldll'*
-'tcldll' 		string	(default depends on the build)
+'tcldll'		string	(default depends on the build)
 			global
 			{not in Vi}
 			{only available when compiled with the |+tcl/dyn|
@@ -7494,7 +7503,7 @@ A jump table for the options with a shor
 			{not in Vi}
 	Encoding used for the terminal.  This specifies what character
 	encoding the keyboard produces and the display will understand.  For
-	the GUI it only applies to the keyboard ( 'encoding' is used for the
+	the GUI it only applies to the keyboard ('encoding' is used for the
 	display).  Except for the Mac when 'macatsui' is off, then
 	'termencoding' should be "macroman".
 								*E617*
--- a/runtime/optwin.vim
+++ b/runtime/optwin.vim
@@ -1254,6 +1254,8 @@ if has("multi_byte")
   endif
   call append("$", "ambiwidth\twidth of ambiguous width characters")
   call <SID>OptionG("ambw", &ambw)
+  call append("$", "emoji\temoji characters are full width")
+  call <SID>BinOptionG("emo", &emo)
 endif
 
 
--- a/runtime/tools/unicode.vim
+++ b/runtime/tools/unicode.vim
@@ -251,6 +251,27 @@ func! BuildWidthTable(pattern, tableName
   wincmd p
 endfunc
 
+" Build the amoji width table in a new buffer.
+func! BuildEmojiTable(pattern, tableName)
+  let ranges = []
+  for line in map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")')
+    let token = split(line, '\.\.')
+    if len(token) == 1
+      call add(token, token[0])
+    endif
+    call add(ranges, printf("\t{0x%04x, 0x%04x},", "0x".token[0], "0x".token[1]))
+  endfor
+
+  " New buffer to put the result in.
+  new
+  exe "file " . a:tableName
+  call setline(1, "    static struct interval " . a:tableName . "[] =")
+  call setline(2, "    {")
+  call append('$', ranges)
+  call setline('$', getline('$')[:-2])  " remove last comma
+  call setline(line('$') + 1, "    };")
+  wincmd p
+endfunc
 
 " Try to avoid hitting E36
 set equalalways
@@ -290,3 +311,9 @@ call BuildWidthTable('[WF]', 'doublewidt
 
 " Build the ambiguous width table.
 call BuildWidthTable('A', 'ambiguous')
+
+" Edit the emoji text file.  Requires the netrw plugin.
+edit http://www.unicode.org/Public/emoji/3.0/emoji-data.txt
+
+" Build the emoji table. Ver. 1.0 - 6.0
+call BuildEmojiTable('; Emoji\s\+# [1-6]\.[0-9]', 'emoji')
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -1253,11 +1253,6 @@ utf_char2cells(int c)
 	{0xfe68, 0xfe6b},
 	{0xff01, 0xff60},
 	{0xffe0, 0xffe6},
-	{0x1b000, 0x1b001},
-	{0x1f200, 0x1f202},
-	{0x1f210, 0x1f23a},
-	{0x1f240, 0x1f248},
-	{0x1f250, 0x1f251},
 	{0x20000, 0x2fffd},
 	{0x30000, 0x3fffd}
     };
@@ -1441,6 +1436,148 @@ utf_char2cells(int c)
 	{0x100000, 0x10fffd}
     };
 
+    /* Sorted list of non-overlapping intervals of Emoji characters,
+     * based on http://unicode.org/emoji/charts/emoji-list.html */
+    static struct interval emoji[] =
+    {
+	{0x203c, 0x203c},
+	{0x2049, 0x2049},
+	{0x2122, 0x2122},
+	{0x2139, 0x2139},
+	{0x2194, 0x2199},
+	{0x21a9, 0x21aa},
+	{0x231a, 0x231b},
+	{0x2328, 0x2328},
+	{0x23cf, 0x23cf},
+	{0x23e9, 0x23f3},
+	{0x24c2, 0x24c2},
+	{0x25aa, 0x25ab},
+	{0x25b6, 0x25b6},
+	{0x25c0, 0x25c0},
+	{0x25fb, 0x25fe},
+	{0x2600, 0x2604},
+	{0x260e, 0x260e},
+	{0x2611, 0x2611},
+	{0x2614, 0x2615},
+	{0x2618, 0x2618},
+	{0x261d, 0x261d},
+	{0x2620, 0x2620},
+	{0x2622, 0x2623},
+	{0x2626, 0x2626},
+	{0x262a, 0x262a},
+	{0x262e, 0x262f},
+	{0x2638, 0x263a},
+	{0x2648, 0x2653},
+	{0x2660, 0x2660},
+	{0x2663, 0x2663},
+	{0x2665, 0x2666},
+	{0x2668, 0x2668},
+	{0x267b, 0x267b},
+	{0x267f, 0x267f},
+	{0x2692, 0x2694},
+	{0x2696, 0x2697},
+	{0x2699, 0x2699},
+	{0x269b, 0x269c},
+	{0x26a0, 0x26a1},
+	{0x26aa, 0x26ab},
+	{0x26b0, 0x26b1},
+	{0x26bd, 0x26be},
+	{0x26c4, 0x26c5},
+	{0x26c8, 0x26c8},
+	{0x26ce, 0x26ce},
+	{0x26cf, 0x26cf},
+	{0x26d1, 0x26d1},
+	{0x26d3, 0x26d4},
+	{0x26e9, 0x26ea},
+	{0x26f0, 0x26f5},
+	{0x26f7, 0x26fa},
+	{0x26fd, 0x26fd},
+	{0x2702, 0x2702},
+	{0x2705, 0x2705},
+	{0x2708, 0x2709},
+	{0x270a, 0x270b},
+	{0x270c, 0x270d},
+	{0x270f, 0x270f},
+	{0x2712, 0x2712},
+	{0x2714, 0x2714},
+	{0x2716, 0x2716},
+	{0x271d, 0x271d},
+	{0x2721, 0x2721},
+	{0x2728, 0x2728},
+	{0x2733, 0x2734},
+	{0x2744, 0x2744},
+	{0x2747, 0x2747},
+	{0x274c, 0x274c},
+	{0x274e, 0x274e},
+	{0x2753, 0x2755},
+	{0x2757, 0x2757},
+	{0x2763, 0x2764},
+	{0x2795, 0x2797},
+	{0x27a1, 0x27a1},
+	{0x27b0, 0x27b0},
+	{0x27bf, 0x27bf},
+	{0x2934, 0x2935},
+	{0x2b05, 0x2b07},
+	{0x2b1b, 0x2b1c},
+	{0x2b50, 0x2b50},
+	{0x2b55, 0x2b55},
+	{0x3030, 0x3030},
+	{0x303d, 0x303d},
+	{0x3297, 0x3297},
+	{0x3299, 0x3299},
+	{0x1f004, 0x1f004},
+	{0x1f0cf, 0x1f0cf},
+	{0x1f170, 0x1f171},
+	{0x1f17e, 0x1f17e},
+	{0x1f17f, 0x1f17f},
+	{0x1f18e, 0x1f18e},
+	{0x1f191, 0x1f19a},
+	{0x1f1e6, 0x1f1ff},
+	{0x1f201, 0x1f202},
+	{0x1f21a, 0x1f21a},
+	{0x1f22f, 0x1f22f},
+	{0x1f232, 0x1f23a},
+	{0x1f250, 0x1f251},
+	{0x1f300, 0x1f320},
+	{0x1f330, 0x1f335},
+	{0x1f337, 0x1f37c},
+	{0x1f380, 0x1f393},
+	{0x1f3a0, 0x1f3c4},
+	{0x1f3c6, 0x1f3ca},
+	{0x1f3e0, 0x1f3f0},
+	{0x1f400, 0x1f43e},
+	{0x1f440, 0x1f440},
+	{0x1f442, 0x1f4f7},
+	{0x1f4f9, 0x1f4fc},
+	{0x1f500, 0x1f53d},
+	{0x1f550, 0x1f567},
+	{0x1f5fb, 0x1f5ff},
+	{0x1f600, 0x1f600},
+	{0x1f601, 0x1f610},
+	{0x1f611, 0x1f611},
+	{0x1f612, 0x1f614},
+	{0x1f615, 0x1f615},
+	{0x1f616, 0x1f616},
+	{0x1f617, 0x1f617},
+	{0x1f618, 0x1f618},
+	{0x1f619, 0x1f619},
+	{0x1f61a, 0x1f61a},
+	{0x1f61b, 0x1f61b},
+	{0x1f61c, 0x1f61e},
+	{0x1f61f, 0x1f61f},
+	{0x1f620, 0x1f625},
+	{0x1f626, 0x1f627},
+	{0x1f628, 0x1f62b},
+	{0x1f62c, 0x1f62c},
+	{0x1f62d, 0x1f62d},
+	{0x1f62e, 0x1f62f},
+	{0x1f630, 0x1f633},
+	{0x1f634, 0x1f634},
+	{0x1f635, 0x1f640},
+	{0x1f645, 0x1f64f},
+	{0x1f680, 0x1f6c5}
+    };
+
     if (c >= 0x100)
     {
 #ifdef USE_WCHAR_FUNCTIONS
@@ -1460,6 +1597,8 @@ utf_char2cells(int c)
 	if (intable(doublewidth, sizeof(doublewidth), c))
 	    return 2;
 #endif
+	if (p_emoji && intable(emoji, sizeof(emoji), c))
+	    return 2;
     }
 
     /* Characters below 0x100 are influenced by 'isprint' option */
--- a/src/option.c
+++ b/src/option.c
@@ -1051,6 +1051,15 @@ static struct vimoption options[] =
     {"edcompatible","ed",   P_BOOL|P_VI_DEF,
 			    (char_u *)&p_ed, PV_NONE,
 			    {(char_u *)FALSE, (char_u *)0L} SCRIPTID_INIT},
+    {"emoji",  "emo",	    P_BOOL|P_VI_DEF|P_RCLR,
+#if defined(FEAT_MBYTE)
+			    (char_u *)&p_emoji, PV_NONE,
+			    {(char_u *)TRUE, (char_u *)0L}
+#else
+			    (char_u *)NULL, PV_NONE,
+			    {(char_u *)0L, (char_u *)0L}
+#endif
+			    SCRIPTID_INIT},
     {"encoding",    "enc",  P_STRING|P_VI_DEF|P_RCLR|P_NO_ML,
 #ifdef FEAT_MBYTE
 			    (char_u *)&p_enc, PV_NONE,
@@ -5986,7 +5995,7 @@ did_set_string_option(
 
     /* 'ambiwidth' */
 #ifdef FEAT_MBYTE
-    else if (varp == &p_ambw)
+    else if (varp == &p_ambw || varp == &p_emoji)
     {
 	if (check_opt_strings(p_ambw, p_ambw_values, FALSE) != OK)
 	    errmsg = e_invarg;
--- a/src/option.h
+++ b/src/option.h
@@ -318,6 +318,7 @@ EXTERN int	p_acd;		/* 'autochdir' */
 #endif
 #ifdef FEAT_MBYTE
 EXTERN char_u	*p_ambw;	/* 'ambiwidth' */
+EXTERN char_u	*p_emoji;	/* 'emoji' */
 #endif
 #if defined(FEAT_GUI) && defined(MACOS_X)
 EXTERN int	*p_antialias;	/* 'antialias' */
--- a/src/version.c
+++ b/src/version.c
@@ -749,6 +749,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1604,
+/**/
     1603,
 /**/
     1602,