Mercurial > vim
comparison runtime/tools/unicode.vim @ 2041:d5867fd6b2b7 v7.2.330
updated for version 7.2.330
Problem: Tables for Unicode case operators are outdated.
Solution: Add a Vim script for generating the tables. Include tables for
Unicode 5.2.
author | Bram Moolenaar <bram@zimbu.org> |
---|---|
date | Tue, 12 Jan 2010 19:52:03 +0100 |
parents | |
children | 1378bc45ebe5 |
comparison
equal
deleted
inserted
replaced
2040:70c67b1bb1f1 | 2041:d5867fd6b2b7 |
---|---|
1 " Script to extract tables from Unicode .txt files, to be used in src/mbyte.c. | |
2 " The format of the UnicodeData.txt file is explained here: | |
3 " http://www.unicode.org/Public/5.1.0/ucd/UCD.html | |
4 " For the other files see the header. | |
5 " | |
6 " Usage: Vim -S <this-file> | |
7 " | |
8 " Author: Bram Moolenaar | |
9 " Last Update: 2010 Jan 12 | |
10 | |
11 " Parse lines of UnicodeData.txt. Creates a list of lists in s:dataprops. | |
12 func! ParseDataToProps() | |
13 let s:dataprops = [] | |
14 let lnum = 1 | |
15 while lnum <= line('$') | |
16 let l = split(getline(lnum), '\s*;\s*', 1) | |
17 if len(l) != 15 | |
18 echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 15' | |
19 return | |
20 endif | |
21 call add(s:dataprops, l) | |
22 let lnum += 1 | |
23 endwhile | |
24 endfunc | |
25 | |
26 " Parse lines of CaseFolding.txt. Creates a list of lists in s:foldprops. | |
27 func! ParseFoldProps() | |
28 let s:foldprops = [] | |
29 let lnum = 1 | |
30 while lnum <= line('$') | |
31 let line = getline(lnum) | |
32 if line !~ '^#' && line !~ '^\s*$' | |
33 let l = split(line, '\s*;\s*', 1) | |
34 if len(l) != 4 | |
35 echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4' | |
36 return | |
37 endif | |
38 call add(s:foldprops, l) | |
39 endif | |
40 let lnum += 1 | |
41 endwhile | |
42 endfunc | |
43 | |
44 " Parse lines of EastAsianWidth.txt. Creates a list of lists in s:widthprops. | |
45 func! ParseWidthProps() | |
46 let s:widthprops = [] | |
47 let lnum = 1 | |
48 while lnum <= line('$') | |
49 let line = getline(lnum) | |
50 if line !~ '^#' && line !~ '^\s*$' | |
51 let l = split(line, '\s*;\s*', 1) | |
52 if len(l) != 2 | |
53 echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2' | |
54 return | |
55 endif | |
56 call add(s:widthprops, l) | |
57 endif | |
58 let lnum += 1 | |
59 endwhile | |
60 endfunc | |
61 | |
62 " Build the toLower or toUpper table in a new buffer. | |
63 " Uses s:dataprops. | |
64 func! BuildCaseTable(name, index) | |
65 let start = -1 | |
66 let end = -1 | |
67 let step = 0 | |
68 let add = -1 | |
69 let ranges = [] | |
70 for p in s:dataprops | |
71 if p[a:index] != '' | |
72 let n = ('0x' . p[0]) + 0 | |
73 let nl = ('0x' . p[a:index]) + 0 | |
74 if start >= 0 && add == nl - n && (step == 0 || n - end == step) | |
75 " continue with same range. | |
76 let step = n - end | |
77 let end = n | |
78 else | |
79 if start >= 0 | |
80 " produce previous range | |
81 call Range(ranges, start, end, step, add) | |
82 endif | |
83 let start = n | |
84 let end = n | |
85 let step = 0 | |
86 let add = nl - n | |
87 endif | |
88 endif | |
89 endfor | |
90 if start >= 0 | |
91 call Range(ranges, start, end, step, add) | |
92 endif | |
93 | |
94 " New buffer to put the result in. | |
95 new | |
96 exe "file to" . a:name | |
97 call setline(1, "static convertStruct to" . a:name . "[] =") | |
98 call setline(2, "{") | |
99 call append('$', ranges) | |
100 call setline('$', getline('$')[:-2]) " remove last comma | |
101 call setline(line('$') + 1, "};") | |
102 wincmd p | |
103 endfunc | |
104 | |
105 " Build the foldCase table in a new buffer. | |
106 " Uses s:foldprops. | |
107 func! BuildFoldTable() | |
108 let start = -1 | |
109 let end = -1 | |
110 let step = 0 | |
111 let add = -1 | |
112 let ranges = [] | |
113 for p in s:foldprops | |
114 if p[1] == 'C' || p[1] == 'S' | |
115 let n = ('0x' . p[0]) + 0 | |
116 let nl = ('0x' . p[2]) + 0 | |
117 if start >= 0 && add == nl - n && (step == 0 || n - end == step) | |
118 " continue with same range. | |
119 let step = n - end | |
120 let end = n | |
121 else | |
122 if start >= 0 | |
123 " produce previous range | |
124 call Range(ranges, start, end, step, add) | |
125 endif | |
126 let start = n | |
127 let end = n | |
128 let step = 0 | |
129 let add = nl - n | |
130 endif | |
131 endif | |
132 endfor | |
133 if start >= 0 | |
134 call Range(ranges, start, end, step, add) | |
135 endif | |
136 | |
137 " New buffer to put the result in. | |
138 new | |
139 file foldCase | |
140 call setline(1, "static convertStruct foldCase[] =") | |
141 call setline(2, "{") | |
142 call append('$', ranges) | |
143 call setline('$', getline('$')[:-2]) " remove last comma | |
144 call setline(line('$') + 1, "};") | |
145 wincmd p | |
146 endfunc | |
147 | |
148 func! Range(ranges, start, end, step, add) | |
149 let s = printf("\t{0x%x,0x%x,%d,%d},", a:start, a:end, a:step == 0 ? -1 : a:step, a:add) | |
150 call add(a:ranges, s) | |
151 endfunc | |
152 | |
153 " Build the combining table. | |
154 " Uses s:dataprops. | |
155 func! BuildCombiningTable() | |
156 let start = -1 | |
157 let end = -1 | |
158 let ranges = [] | |
159 for p in s:dataprops | |
160 if p[2] == 'Mn' || p[2] == 'Mc' || p[2] == 'Me' | |
161 let n = ('0x' . p[0]) + 0 | |
162 if start >= 0 && end + 1 == n | |
163 " continue with same range. | |
164 let end = n | |
165 else | |
166 if start >= 0 | |
167 " produce previous range | |
168 call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) | |
169 endif | |
170 let start = n | |
171 let end = n | |
172 endif | |
173 endif | |
174 endfor | |
175 if start >= 0 | |
176 call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) | |
177 endif | |
178 | |
179 " New buffer to put the result in. | |
180 new | |
181 file combining | |
182 call setline(1, " static struct interval combining[] =") | |
183 call setline(2, " {") | |
184 call append('$', ranges) | |
185 call setline('$', getline('$')[:-2]) " remove last comma | |
186 call setline(line('$') + 1, " };") | |
187 wincmd p | |
188 endfunc | |
189 | |
190 " Build the ambiguous table in a new buffer. | |
191 " Uses s:widthprops and s:dataprops. | |
192 func! BuildAmbiguousTable() | |
193 let start = -1 | |
194 let end = -1 | |
195 let ranges = [] | |
196 let dataidx = 0 | |
197 for p in s:widthprops | |
198 if p[1][0] == 'A' | |
199 let n = ('0x' . p[0]) + 0 | |
200 " Find this char in the data table. | |
201 while 1 | |
202 let dn = ('0x' . s:dataprops[dataidx][0]) + 0 | |
203 if dn >= n | |
204 break | |
205 endif | |
206 let dataidx += 1 | |
207 endwhile | |
208 if dn != n | |
209 echoerr "Cannot find character " . n . " in data table" | |
210 endif | |
211 " Only use the char when it's not a composing char. | |
212 let dp = s:dataprops[dataidx] | |
213 if dp[2] != 'Mn' && dp[2] != 'Mc' && dp[2] != 'Me' | |
214 if start >= 0 && end + 1 == n | |
215 " continue with same range. | |
216 let end = n | |
217 else | |
218 if start >= 0 | |
219 " produce previous range | |
220 call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) | |
221 endif | |
222 let start = n | |
223 if p[0] =~ '\.\.' | |
224 let end = ('0x' . substitute(p[0], '.*\.\.', '', '')) + 0 | |
225 else | |
226 let end = n | |
227 endif | |
228 endif | |
229 endif | |
230 endif | |
231 endfor | |
232 if start >= 0 | |
233 call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) | |
234 endif | |
235 | |
236 " New buffer to put the result in. | |
237 new | |
238 file ambiguous | |
239 call setline(1, " static struct interval ambiguous[] =") | |
240 call setline(2, " {") | |
241 call append('$', ranges) | |
242 call setline('$', getline('$')[:-2]) " remove last comma | |
243 call setline(line('$') + 1, " };") | |
244 wincmd p | |
245 endfunc | |
246 | |
247 | |
248 | |
249 " Edit the Unicode text file. Requires the netrw plugin. | |
250 edit http://unicode.org/Public/UNIDATA/UnicodeData.txt | |
251 | |
252 " Parse each line, create a list of lists. | |
253 call ParseDataToProps() | |
254 | |
255 " Build the toLower table. | |
256 call BuildCaseTable("Lower", 13) | |
257 | |
258 " Build the toUpper table. | |
259 call BuildCaseTable("Upper", 12) | |
260 | |
261 " Build the ranges of composing chars. | |
262 call BuildCombiningTable() | |
263 | |
264 " Edit the case folding text file. Requires the netrw plugin. | |
265 edit http://www.unicode.org/Public/UNIDATA/CaseFolding.txt | |
266 | |
267 " Parse each line, create a list of lists. | |
268 call ParseFoldProps() | |
269 | |
270 " Build the foldCase table. | |
271 call BuildFoldTable() | |
272 | |
273 " Edit the width text file. Requires the netrw plugin. | |
274 edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt | |
275 | |
276 " Parse each line, create a list of lists. | |
277 call ParseWidthProps() | |
278 | |
279 " Build the ambiguous table. | |
280 call BuildAmbiguousTable() |