Mercurial > vim
annotate runtime/tools/unicode.vim @ 7539:f5d3b2c6f971
Added tag v7.4.1070 for changeset c9fc24b7629385e94b7479146e610fc7c28d07a3
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Sat, 09 Jan 2016 19:45:05 +0100 |
parents | c9a5d51c9161 |
children | 54ac275e3fc4 |
rev | line source |
---|---|
2041 | 1 " Script to extract tables from Unicode .txt files, to be used in src/mbyte.c. |
2 " The format of the UnicodeData.txt file is explained here: | |
3 " http://www.unicode.org/Public/5.1.0/ucd/UCD.html | |
4 " For the other files see the header. | |
5 " | |
6 " Usage: Vim -S <this-file> | |
7 " | |
8 " Author: Bram Moolenaar | |
9 " Last Update: 2010 Jan 12 | |
10 | |
11 " Parse lines of UnicodeData.txt. Creates a list of lists in s:dataprops. | |
12 func! ParseDataToProps() | |
13 let s:dataprops = [] | |
14 let lnum = 1 | |
15 while lnum <= line('$') | |
16 let l = split(getline(lnum), '\s*;\s*', 1) | |
17 if len(l) != 15 | |
18 echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 15' | |
19 return | |
20 endif | |
21 call add(s:dataprops, l) | |
22 let lnum += 1 | |
23 endwhile | |
24 endfunc | |
25 | |
26 " Parse lines of CaseFolding.txt. Creates a list of lists in s:foldprops. | |
27 func! ParseFoldProps() | |
28 let s:foldprops = [] | |
29 let lnum = 1 | |
30 while lnum <= line('$') | |
31 let line = getline(lnum) | |
32 if line !~ '^#' && line !~ '^\s*$' | |
33 let l = split(line, '\s*;\s*', 1) | |
34 if len(l) != 4 | |
35 echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4' | |
36 return | |
37 endif | |
38 call add(s:foldprops, l) | |
39 endif | |
40 let lnum += 1 | |
41 endwhile | |
42 endfunc | |
43 | |
44 " Parse lines of EastAsianWidth.txt. Creates a list of lists in s:widthprops. | |
45 func! ParseWidthProps() | |
46 let s:widthprops = [] | |
47 let lnum = 1 | |
48 while lnum <= line('$') | |
49 let line = getline(lnum) | |
50 if line !~ '^#' && line !~ '^\s*$' | |
51 let l = split(line, '\s*;\s*', 1) | |
52 if len(l) != 2 | |
53 echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2' | |
54 return | |
55 endif | |
56 call add(s:widthprops, l) | |
57 endif | |
58 let lnum += 1 | |
59 endwhile | |
60 endfunc | |
61 | |
62 " Build the toLower or toUpper table in a new buffer. | |
63 " Uses s:dataprops. | |
64 func! BuildCaseTable(name, index) | |
65 let start = -1 | |
66 let end = -1 | |
67 let step = 0 | |
68 let add = -1 | |
69 let ranges = [] | |
70 for p in s:dataprops | |
71 if p[a:index] != '' | |
72 let n = ('0x' . p[0]) + 0 | |
73 let nl = ('0x' . p[a:index]) + 0 | |
74 if start >= 0 && add == nl - n && (step == 0 || n - end == step) | |
75 " continue with same range. | |
76 let step = n - end | |
77 let end = n | |
78 else | |
79 if start >= 0 | |
80 " produce previous range | |
81 call Range(ranges, start, end, step, add) | |
82 endif | |
83 let start = n | |
84 let end = n | |
85 let step = 0 | |
86 let add = nl - n | |
87 endif | |
88 endif | |
89 endfor | |
90 if start >= 0 | |
91 call Range(ranges, start, end, step, add) | |
92 endif | |
93 | |
94 " New buffer to put the result in. | |
95 new | |
96 exe "file to" . a:name | |
97 call setline(1, "static convertStruct to" . a:name . "[] =") | |
98 call setline(2, "{") | |
99 call append('$', ranges) | |
100 call setline('$', getline('$')[:-2]) " remove last comma | |
101 call setline(line('$') + 1, "};") | |
102 wincmd p | |
103 endfunc | |
104 | |
105 " Build the foldCase table in a new buffer. | |
106 " Uses s:foldprops. | |
107 func! BuildFoldTable() | |
108 let start = -1 | |
109 let end = -1 | |
110 let step = 0 | |
111 let add = -1 | |
112 let ranges = [] | |
113 for p in s:foldprops | |
114 if p[1] == 'C' || p[1] == 'S' | |
115 let n = ('0x' . p[0]) + 0 | |
116 let nl = ('0x' . p[2]) + 0 | |
117 if start >= 0 && add == nl - n && (step == 0 || n - end == step) | |
118 " continue with same range. | |
119 let step = n - end | |
120 let end = n | |
121 else | |
122 if start >= 0 | |
123 " produce previous range | |
124 call Range(ranges, start, end, step, add) | |
125 endif | |
126 let start = n | |
127 let end = n | |
128 let step = 0 | |
129 let add = nl - n | |
130 endif | |
131 endif | |
132 endfor | |
133 if start >= 0 | |
134 call Range(ranges, start, end, step, add) | |
135 endif | |
136 | |
137 " New buffer to put the result in. | |
138 new | |
139 file foldCase | |
140 call setline(1, "static convertStruct foldCase[] =") | |
141 call setline(2, "{") | |
142 call append('$', ranges) | |
143 call setline('$', getline('$')[:-2]) " remove last comma | |
144 call setline(line('$') + 1, "};") | |
145 wincmd p | |
146 endfunc | |
147 | |
148 func! Range(ranges, start, end, step, add) | |
149 let s = printf("\t{0x%x,0x%x,%d,%d},", a:start, a:end, a:step == 0 ? -1 : a:step, a:add) | |
150 call add(a:ranges, s) | |
151 endfunc | |
152 | |
153 " Build the combining table. | |
154 " Uses s:dataprops. | |
155 func! BuildCombiningTable() | |
156 let start = -1 | |
157 let end = -1 | |
158 let ranges = [] | |
159 for p in s:dataprops | |
160 if p[2] == 'Mn' || p[2] == 'Mc' || p[2] == 'Me' | |
161 let n = ('0x' . p[0]) + 0 | |
162 if start >= 0 && end + 1 == n | |
163 " continue with same range. | |
164 let end = n | |
165 else | |
166 if start >= 0 | |
167 " produce previous range | |
168 call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) | |
169 endif | |
170 let start = n | |
171 let end = n | |
172 endif | |
173 endif | |
174 endfor | |
175 if start >= 0 | |
176 call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) | |
177 endif | |
178 | |
179 " New buffer to put the result in. | |
180 new | |
181 file combining | |
182 call setline(1, " static struct interval combining[] =") | |
183 call setline(2, " {") | |
184 call append('$', ranges) | |
185 call setline('$', getline('$')[:-2]) " remove last comma | |
186 call setline(line('$') + 1, " };") | |
187 wincmd p | |
188 endfunc | |
189 | |
2063
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
190 " Build the double width or ambiguous width table in a new buffer. |
2041 | 191 " Uses s:widthprops and s:dataprops. |
2063
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
192 func! BuildWidthTable(pattern, tableName) |
2041 | 193 let start = -1 |
194 let end = -1 | |
195 let ranges = [] | |
196 let dataidx = 0 | |
197 for p in s:widthprops | |
2063
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
198 if p[1][0] =~ a:pattern |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
199 if p[0] =~ '\.\.' |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
200 " It is a range. we don't check for composing char then. |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
201 let rng = split(p[0], '\.\.') |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
202 if len(rng) != 2 |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
203 echoerr "Cannot parse range: '" . p[0] . "' in width table" |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
204 endif |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
205 let n = ('0x' . rng[0]) + 0 |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
206 let n_last = ('0x' . rng[1]) + 0 |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
207 else |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
208 let n = ('0x' . p[0]) + 0 |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
209 let n_last = n |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
210 endif |
2041 | 211 " Find this char in the data table. |
212 while 1 | |
213 let dn = ('0x' . s:dataprops[dataidx][0]) + 0 | |
214 if dn >= n | |
215 break | |
216 endif | |
217 let dataidx += 1 | |
218 endwhile | |
2063
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
219 if dn != n && n_last == n |
2041 | 220 echoerr "Cannot find character " . n . " in data table" |
221 endif | |
222 " Only use the char when it's not a composing char. | |
2063
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
223 " But use all chars from a range. |
2041 | 224 let dp = s:dataprops[dataidx] |
2063
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
225 if n_last > n || (dp[2] != 'Mn' && dp[2] != 'Mc' && dp[2] != 'Me') |
2041 | 226 if start >= 0 && end + 1 == n |
227 " continue with same range. | |
228 else | |
229 if start >= 0 | |
230 " produce previous range | |
231 call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) | |
232 endif | |
233 let start = n | |
234 endif | |
2063
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
235 let end = n_last |
2041 | 236 endif |
237 endif | |
238 endfor | |
239 if start >= 0 | |
240 call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) | |
241 endif | |
242 | |
243 " New buffer to put the result in. | |
244 new | |
2063
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
245 exe "file " . a:tableName |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
246 call setline(1, " static struct interval " . a:tableName . "[] =") |
2041 | 247 call setline(2, " {") |
248 call append('$', ranges) | |
249 call setline('$', getline('$')[:-2]) " remove last comma | |
250 call setline(line('$') + 1, " };") | |
251 wincmd p | |
252 endfunc | |
253 | |
254 | |
6864 | 255 " Try to avoid hitting E36 |
256 set equalalways | |
2041 | 257 |
258 " Edit the Unicode text file. Requires the netrw plugin. | |
259 edit http://unicode.org/Public/UNIDATA/UnicodeData.txt | |
260 | |
261 " Parse each line, create a list of lists. | |
262 call ParseDataToProps() | |
263 | |
264 " Build the toLower table. | |
265 call BuildCaseTable("Lower", 13) | |
266 | |
267 " Build the toUpper table. | |
268 call BuildCaseTable("Upper", 12) | |
269 | |
270 " Build the ranges of composing chars. | |
271 call BuildCombiningTable() | |
272 | |
273 " Edit the case folding text file. Requires the netrw plugin. | |
274 edit http://www.unicode.org/Public/UNIDATA/CaseFolding.txt | |
275 | |
276 " Parse each line, create a list of lists. | |
277 call ParseFoldProps() | |
278 | |
279 " Build the foldCase table. | |
280 call BuildFoldTable() | |
281 | |
282 " Edit the width text file. Requires the netrw plugin. | |
283 edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt | |
284 | |
285 " Parse each line, create a list of lists. | |
286 call ParseWidthProps() | |
287 | |
2063
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
288 " Build the double width table. |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
289 call BuildWidthTable('[WF]', 'doublewidth') |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
290 |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
291 " Build the ambiguous width table. |
1378bc45ebe5
updated for version 7.2.348
Bram Moolenaar <bram@zimbu.org>
parents:
2041
diff
changeset
|
292 call BuildWidthTable('A', 'ambiguous') |