Mercurial > vim
annotate src/os_mac_conv.c @ 31235:7fb4e244b16e v9.0.0951
patch 9.0.0951: trying every character position for a match is inefficient
Commit: https://github.com/vim/vim/commit/01105b37a108022515d364201767f7f111ec4222
Author: Bram Moolenaar <Bram@vim.org>
Date: Sat Nov 26 11:47:10 2022 +0000
patch 9.0.0951: trying every character position for a match is inefficient
Problem: Trying every character position for a match is inefficient.
Solution: Use the start position of the match ignoring "\zs".
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Sat, 26 Nov 2022 13:00:05 +0100 |
parents | 029c59bf78f1 |
children | 3365a601e73b |
rev | line source |
---|---|
10042
4aead6a9b7a9
commit https://github.com/vim/vim/commit/edf3f97ae2af024708ebb4ac614227327033ca47
Christian Brabandt <cb@256bit.org>
parents:
10025
diff
changeset
|
1 /* vi:set ts=8 sts=4 sw=4 noet: |
18 | 2 * |
3 * VIM - Vi IMproved by Bram Moolenaar | |
4 * | |
5 * Do ":help uganda" in Vim to read copying and usage conditions. | |
6 * Do ":help credits" in Vim to see a list of people who contributed. | |
7 * See README.txt for an overview of the Vim source code. | |
8 */ | |
9 /* | |
10 * os_mac_conv.c: Code specifically for Mac string conversions. | |
11 * | |
12 * This code has been put in a separate file to avoid the conflicts that are | |
13 * caused by including both the X11 and Carbon header files. | |
14 */ | |
15 | |
16 #define NO_X11_INCLUDES | |
2891 | 17 |
18 | 18 #include "vim.h" |
7811
7fda54504fee
commit https://github.com/vim/vim/commit/3e96c3d241ab657cf4df0913ea8de50a6cb90730
Christian Brabandt <cb@256bit.org>
parents:
7805
diff
changeset
|
19 |
21745
35921b7fc07a
patch 8.2.1422: the Mac GUI implementation is outdated
Bram Moolenaar <Bram@vim.org>
parents:
18810
diff
changeset
|
20 #if !defined(PROTO) |
2309
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
21 # include <CoreServices/CoreServices.h> |
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
22 #endif |
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
23 |
18 | 24 |
766 | 25 #if defined(MACOS_CONVERT) || defined(PROTO) |
2309
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
26 |
766 | 27 # ifdef PROTO |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
28 // A few dummy types to be able to generate function prototypes. |
766 | 29 typedef int UniChar; |
30 typedef int *TECObjectRef; | |
31 typedef int CFStringRef; | |
32 # endif | |
33 | |
7805
0b6c37dd858d
commit https://github.com/vim/vim/commit/baaa7e9ec7398a813e21285c272fa99792642077
Christian Brabandt <cb@256bit.org>
parents:
3143
diff
changeset
|
34 static char_u *mac_utf16_to_utf8(UniChar *from, size_t fromLen, size_t *actualLen); |
0b6c37dd858d
commit https://github.com/vim/vim/commit/baaa7e9ec7398a813e21285c272fa99792642077
Christian Brabandt <cb@256bit.org>
parents:
3143
diff
changeset
|
35 static UniChar *mac_utf8_to_utf16(char_u *from, size_t fromLen, size_t *actualLen); |
168 | 36 |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
37 // Converter for composing decomposed HFS+ file paths |
168 | 38 static TECObjectRef gPathConverter; |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
39 // Converter used by mac_utf16_to_utf8 |
168 | 40 static TECObjectRef gUTF16ToUTF8Converter; |
41 | |
18 | 42 /* |
43 * A Mac version of string_convert_ext() for special cases. | |
44 */ | |
45 char_u * | |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
46 mac_string_convert( |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
47 char_u *ptr, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
48 int len, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
49 int *lenp, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
50 int fail_on_error, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
51 int from_enc, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
52 int to_enc, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
53 int *unconvlenp) |
18 | 54 { |
55 char_u *retval, *d; | |
56 CFStringRef cfstr; | |
57 int buflen, in, out, l, i; | |
58 CFStringEncoding from; | |
59 CFStringEncoding to; | |
60 | |
61 switch (from_enc) | |
62 { | |
63 case 'l': from = kCFStringEncodingISOLatin1; break; | |
64 case 'm': from = kCFStringEncodingMacRoman; break; | |
65 case 'u': from = kCFStringEncodingUTF8; break; | |
66 default: return NULL; | |
67 } | |
68 switch (to_enc) | |
69 { | |
70 case 'l': to = kCFStringEncodingISOLatin1; break; | |
71 case 'm': to = kCFStringEncodingMacRoman; break; | |
72 case 'u': to = kCFStringEncodingUTF8; break; | |
73 default: return NULL; | |
74 } | |
75 | |
76 if (unconvlenp != NULL) | |
77 *unconvlenp = 0; | |
78 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); | |
79 | |
3143 | 80 if (cfstr == NULL) |
168 | 81 fprintf(stderr, "Encoding failed\n"); |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
82 // When conversion failed, try excluding bytes from the end, helps when |
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
83 // there is an incomplete byte sequence. Only do up to 6 bytes to avoid |
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
84 // looping a long time when there really is something unconvertible. |
18 | 85 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) |
86 { | |
87 --len; | |
88 ++*unconvlenp; | |
89 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); | |
90 } | |
91 if (cfstr == NULL) | |
92 return NULL; | |
168 | 93 |
18 | 94 if (to == kCFStringEncodingUTF8) |
95 buflen = len * 6 + 1; | |
96 else | |
97 buflen = len + 1; | |
98 retval = alloc(buflen); | |
99 if (retval == NULL) | |
100 { | |
101 CFRelease(cfstr); | |
102 return NULL; | |
103 } | |
168 | 104 |
105 #if 0 | |
106 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr)); | |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
107 // Determine output buffer size |
168 | 108 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen); |
109 retval = (buflen > 0) ? alloc(buflen) : NULL; | |
110 if (retval == NULL) { | |
111 CFRelease(cfstr); | |
112 return NULL; | |
113 } | |
114 | |
115 if (lenp) | |
116 *lenp = buflen / sizeof(char_u); | |
117 | |
118 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL)) | |
119 #endif | |
501 | 120 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to)) |
18 | 121 { |
122 CFRelease(cfstr); | |
123 if (fail_on_error) | |
124 { | |
125 vim_free(retval); | |
126 return NULL; | |
127 } | |
128 | |
168 | 129 fprintf(stderr, "Trying char-by-char conversion...\n"); |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
130 // conversion failed for the whole string, but maybe it will work |
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
131 // for each character |
18 | 132 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) |
133 { | |
134 if (from == kCFStringEncodingUTF8) | |
474 | 135 l = utf_ptr2len(ptr + in); |
18 | 136 else |
137 l = 1; | |
138 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0); | |
139 if (cfstr == NULL) | |
140 { | |
141 *d++ = '?'; | |
142 out++; | |
143 } | |
144 else | |
145 { | |
501 | 146 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to)) |
18 | 147 { |
148 *d++ = '?'; | |
149 out++; | |
150 } | |
151 else | |
152 { | |
501 | 153 i = STRLEN(d); |
18 | 154 d += i; |
155 out += i; | |
156 } | |
157 CFRelease(cfstr); | |
158 } | |
159 in += l; | |
160 } | |
161 *d = NUL; | |
162 if (lenp != NULL) | |
163 *lenp = out; | |
164 return retval; | |
165 } | |
166 CFRelease(cfstr); | |
167 if (lenp != NULL) | |
501 | 168 *lenp = STRLEN(retval); |
168 | 169 |
18 | 170 return retval; |
171 } | |
172 | |
173 /* | |
174 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using | |
175 * standard Carbon framework. | |
176 * Input: "ptr[*sizep]". | |
177 * "real_size" is the size of the buffer that "ptr" points to. | |
178 * output is in-place, "sizep" is adjusted. | |
179 * Returns OK or FAIL. | |
180 */ | |
181 int | |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
182 macroman2enc( |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
183 char_u *ptr, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
184 long *sizep, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
185 long real_size) |
18 | 186 { |
187 CFStringRef cfstr; | |
188 CFRange r; | |
189 CFIndex len = *sizep; | |
190 | |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
191 // MacRoman is an 8-bit encoding, no need to move bytes to |
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
192 // conv_rest[]. |
18 | 193 cfstr = CFStringCreateWithBytes(NULL, ptr, len, |
194 kCFStringEncodingMacRoman, 0); | |
195 /* | |
196 * If there is a conversion error, try using another | |
197 * conversion. | |
198 */ | |
199 if (cfstr == NULL) | |
200 return FAIL; | |
201 | |
202 r.location = 0; | |
203 r.length = CFStringGetLength(cfstr); | |
204 if (r.length != CFStringGetBytes(cfstr, r, | |
205 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, | |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
206 0, // no lossy conversion |
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
207 0, // not external representation |
18 | 208 ptr + *sizep, real_size - *sizep, &len)) |
209 { | |
210 CFRelease(cfstr); | |
211 return FAIL; | |
212 } | |
213 CFRelease(cfstr); | |
214 mch_memmove(ptr, ptr + *sizep, len); | |
215 *sizep = len; | |
216 | |
217 return OK; | |
218 } | |
219 | |
220 /* | |
221 * Conversion from UTF-8 or latin1 to MacRoman. | |
222 * Input: "from[fromlen]" | |
223 * Output: "to[maxtolen]" length in "*tolenp" | |
224 * Unconverted rest in rest[*restlenp]. | |
225 * Returns OK or FAIL. | |
226 */ | |
227 int | |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
228 enc2macroman( |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
229 char_u *from, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
230 size_t fromlen, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
231 char_u *to, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
232 int *tolenp, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
233 int maxtolen, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
234 char_u *rest, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
235 int *restlenp) |
18 | 236 { |
237 CFStringRef cfstr; | |
238 CFRange r; | |
239 CFIndex l; | |
240 | |
241 *restlenp = 0; | |
242 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, | |
243 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, | |
244 0); | |
245 while (cfstr == NULL && *restlenp < 3 && fromlen > 1) | |
246 { | |
247 rest[*restlenp++] = from[--fromlen]; | |
248 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, | |
249 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, | |
250 0); | |
251 } | |
252 if (cfstr == NULL) | |
253 return FAIL; | |
254 | |
255 r.location = 0; | |
256 r.length = CFStringGetLength(cfstr); | |
257 if (r.length != CFStringGetBytes(cfstr, r, | |
258 kCFStringEncodingMacRoman, | |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
259 0, // no lossy conversion |
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
260 0, // not external representation (since vim |
26771
fc859aea8cec
patch 8.2.3914: various spelling mistakes in comments
Bram Moolenaar <Bram@vim.org>
parents:
22417
diff
changeset
|
261 // handles this internally) |
18 | 262 to, maxtolen, &l)) |
263 { | |
264 CFRelease(cfstr); | |
265 return FAIL; | |
266 } | |
267 CFRelease(cfstr); | |
268 *tolenp = l; | |
269 return OK; | |
270 } | |
20 | 271 |
168 | 272 /* |
273 * Initializes text converters | |
274 */ | |
275 void | |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
276 mac_conv_init(void) |
168 | 277 { |
278 TextEncoding utf8_encoding; | |
279 TextEncoding utf8_hfsplus_encoding; | |
280 TextEncoding utf8_canon_encoding; | |
281 TextEncoding utf16_encoding; | |
282 | |
283 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
284 kTextEncodingDefaultVariant, kUnicodeUTF8Format); | |
285 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
286 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format); | |
287 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
288 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format); | |
289 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
290 kTextEncodingDefaultVariant, kUnicode16BitFormat); | |
291 | |
292 if (TECCreateConverter(&gPathConverter, utf8_encoding, | |
293 utf8_hfsplus_encoding) != noErr) | |
294 gPathConverter = NULL; | |
295 | |
296 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, | |
297 utf8_canon_encoding) != noErr) | |
179 | 298 { |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
299 // On pre-10.3, Unicode normalization is not available so |
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
300 // fall back to non-normalizing converter |
179 | 301 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, |
302 utf8_encoding) != noErr) | |
303 gUTF16ToUTF8Converter = NULL; | |
304 } | |
168 | 305 } |
306 | |
307 /* | |
308 * Destroys text converters | |
309 */ | |
310 void | |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
311 mac_conv_cleanup(void) |
168 | 312 { |
313 if (gUTF16ToUTF8Converter) | |
314 { | |
315 TECDisposeConverter(gUTF16ToUTF8Converter); | |
316 gUTF16ToUTF8Converter = NULL; | |
317 } | |
318 | |
319 if (gPathConverter) | |
320 { | |
321 TECDisposeConverter(gPathConverter); | |
322 gPathConverter = NULL; | |
323 } | |
324 } | |
325 | |
326 /* | |
327 * Conversion from UTF-16 UniChars to 'encoding' | |
1621 | 328 * The function signature uses the real type of UniChar (as typedef'ed in |
329 * CFBase.h) to avoid clashes with X11 header files in the .pro file | |
168 | 330 */ |
331 char_u * | |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
332 mac_utf16_to_enc( |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
333 unsigned short *from, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
334 size_t fromLen, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
335 size_t *actualLen) |
168 | 336 { |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
337 // Following code borrows somewhat from os_mswin.c |
168 | 338 vimconv_T conv; |
339 size_t utf8_len; | |
340 char_u *utf8_str; | |
341 char_u *result = NULL; | |
342 | |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
343 // Convert to utf-8 first, works better with iconv |
168 | 344 utf8_len = 0; |
345 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len); | |
346 | |
347 if (utf8_str) | |
348 { | |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
349 // We might be called before we have p_enc set up. |
168 | 350 conv.vc_type = CONV_NONE; |
351 | |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
352 // If encoding (p_enc) is any unicode, it is actually in utf-8 (vim |
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
353 // internal unicode is always utf-8) so don't convert in such cases |
168 | 354 |
355 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0) | |
356 convert_setup(&conv, (char_u *)"utf-8", | |
357 p_enc? p_enc: (char_u *)"macroman"); | |
358 if (conv.vc_type == CONV_NONE) | |
359 { | |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
360 // p_enc is utf-8, so we're done. |
168 | 361 result = utf8_str; |
362 } | |
363 else | |
364 { | |
365 result = string_convert(&conv, utf8_str, (int *)&utf8_len); | |
366 vim_free(utf8_str); | |
367 } | |
368 | |
369 convert_setup(&conv, NULL, NULL); | |
370 | |
371 if (actualLen) | |
372 *actualLen = utf8_len; | |
373 } | |
374 else if (actualLen) | |
375 *actualLen = 0; | |
376 | |
377 return result; | |
378 } | |
379 | |
380 /* | |
381 * Conversion from 'encoding' to UTF-16 UniChars | |
1621 | 382 * The function return uses the real type of UniChar (as typedef'ed in |
383 * CFBase.h) to avoid clashes with X11 header files in the .pro file | |
168 | 384 */ |
1621 | 385 unsigned short * |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
386 mac_enc_to_utf16( |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
387 char_u *from, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
388 size_t fromLen, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
389 size_t *actualLen) |
168 | 390 { |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
391 // Following code borrows somewhat from os_mswin.c |
168 | 392 vimconv_T conv; |
393 size_t utf8_len; | |
394 char_u *utf8_str; | |
395 UniChar *result = NULL; | |
396 Boolean should_free_utf8 = FALSE; | |
397 | |
398 do | |
399 { | |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
400 // Use MacRoman by default, we might be called before we have p_enc |
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
401 // set up. Convert to utf-8 first, works better with iconv(). Does |
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
402 // nothing if 'encoding' is "utf-8". |
168 | 403 conv.vc_type = CONV_NONE; |
404 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 && | |
405 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman", | |
406 (char_u *)"utf-8") == FAIL) | |
407 break; | |
408 | |
409 if (conv.vc_type != CONV_NONE) | |
410 { | |
411 utf8_len = fromLen; | |
412 utf8_str = string_convert(&conv, from, (int *)&utf8_len); | |
413 should_free_utf8 = TRUE; | |
414 } | |
415 else | |
416 { | |
417 utf8_str = from; | |
418 utf8_len = fromLen; | |
419 } | |
420 | |
421 if (utf8_str == NULL) | |
422 break; | |
423 | |
424 convert_setup(&conv, NULL, NULL); | |
425 | |
426 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen); | |
427 | |
428 if (should_free_utf8) | |
429 vim_free(utf8_str); | |
430 return result; | |
431 } | |
432 while (0); | |
433 | |
434 if (actualLen) | |
435 *actualLen = 0; | |
436 | |
437 return result; | |
438 } | |
439 | |
440 /* | |
441 * Converts from UTF-16 UniChars to CFString | |
1621 | 442 * The void * return type is actually a CFStringRef |
168 | 443 */ |
1621 | 444 void * |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
445 mac_enc_to_cfstring( |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
446 char_u *from, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
447 size_t fromLen) |
168 | 448 { |
449 UniChar *utf16_str; | |
450 size_t utf16_len; | |
451 CFStringRef result = NULL; | |
452 | |
453 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len); | |
454 if (utf16_str) | |
455 { | |
456 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar)); | |
457 vim_free(utf16_str); | |
458 } | |
459 | |
1621 | 460 return (void *)result; |
168 | 461 } |
462 | |
463 /* | |
464 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8 | |
465 */ | |
466 char_u * | |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
467 mac_precompose_path( |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
468 char_u *decompPath, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
469 size_t decompLen, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
470 size_t *precompLen) |
168 | 471 { |
472 char_u *result = NULL; | |
473 size_t actualLen = 0; | |
474 | |
475 if (gPathConverter) | |
476 { | |
477 result = alloc(decompLen); | |
478 if (result) | |
479 { | |
480 if (TECConvertText(gPathConverter, decompPath, | |
481 decompLen, &decompLen, result, | |
482 decompLen, &actualLen) != noErr) | |
13244
ac42c4b11dbc
patch 8.0.1496: clearing a pointer takes two lines
Christian Brabandt <cb@256bit.org>
parents:
12879
diff
changeset
|
483 VIM_CLEAR(result); |
168 | 484 } |
485 } | |
486 | |
487 if (precompLen) | |
488 *precompLen = actualLen; | |
489 | |
490 return result; | |
491 } | |
492 | |
493 /* | |
494 * Converts from UTF-16 UniChars to precomposed UTF-8 | |
495 */ | |
766 | 496 static char_u * |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
497 mac_utf16_to_utf8( |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
498 UniChar *from, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
499 size_t fromLen, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
500 size_t *actualLen) |
168 | 501 { |
502 ByteCount utf8_len; | |
503 ByteCount inputRead; | |
504 char_u *result; | |
505 | |
506 if (gUTF16ToUTF8Converter) | |
507 { | |
508 result = alloc(fromLen * 6 + 1); | |
509 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from, | |
510 fromLen, &inputRead, result, | |
511 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr) | |
512 { | |
513 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead); | |
514 utf8_len += inputRead; | |
515 } | |
516 else | |
13244
ac42c4b11dbc
patch 8.0.1496: clearing a pointer takes two lines
Christian Brabandt <cb@256bit.org>
parents:
12879
diff
changeset
|
517 VIM_CLEAR(result); |
168 | 518 } |
519 else | |
520 { | |
521 result = NULL; | |
522 } | |
523 | |
524 if (actualLen) | |
525 *actualLen = result ? utf8_len : 0; | |
526 | |
527 return result; | |
528 } | |
529 | |
530 /* | |
531 * Converts from UTF-8 to UTF-16 UniChars | |
532 */ | |
766 | 533 static UniChar * |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
534 mac_utf8_to_utf16( |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
535 char_u *from, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
536 size_t fromLen, |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
537 size_t *actualLen) |
168 | 538 { |
539 CFStringRef utf8_str; | |
540 CFRange convertRange; | |
541 UniChar *result = NULL; | |
542 | |
543 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen, | |
544 kCFStringEncodingUTF8, FALSE); | |
545 | |
546 if (utf8_str == NULL) { | |
547 if (actualLen) | |
548 *actualLen = 0; | |
549 return NULL; | |
550 } | |
551 | |
552 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str)); | |
16825
ce04ebdf26b8
patch 8.1.1414: alloc() returning "char_u *" causes a lot of type casts
Bram Moolenaar <Bram@vim.org>
parents:
13244
diff
changeset
|
553 result = ALLOC_MULT(UniChar, convertRange.length); |
168 | 554 |
555 CFStringGetCharacters(utf8_str, convertRange, result); | |
556 | |
557 CFRelease(utf8_str); | |
558 | |
559 if (actualLen) | |
560 *actualLen = convertRange.length * sizeof(UniChar); | |
561 | |
562 return result; | |
563 } | |
1621 | 564 |
565 /* | |
566 * Sets LANG environment variable in Vim from Mac locale | |
567 */ | |
568 void | |
7833
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
569 mac_lang_init(void) |
c079097365f3
commit https://github.com/vim/vim/commit/055409764ca5f7978d4c399d2c440af0ce971c4f
Christian Brabandt <cb@256bit.org>
parents:
7811
diff
changeset
|
570 { |
1621 | 571 if (mch_getenv((char_u *)"LANG") == NULL) |
572 { | |
22417
68115baaf9e4
patch 8.2.1757: Mac: default locale is lacking the encoding
Bram Moolenaar <Bram@vim.org>
parents:
22379
diff
changeset
|
573 char buf[50]; |
68115baaf9e4
patch 8.2.1757: Mac: default locale is lacking the encoding
Bram Moolenaar <Bram@vim.org>
parents:
22379
diff
changeset
|
574 |
68115baaf9e4
patch 8.2.1757: Mac: default locale is lacking the encoding
Bram Moolenaar <Bram@vim.org>
parents:
22379
diff
changeset
|
575 // $LANG is not set, either because it was unset or Vim was started |
68115baaf9e4
patch 8.2.1757: Mac: default locale is lacking the encoding
Bram Moolenaar <Bram@vim.org>
parents:
22379
diff
changeset
|
576 // from the Dock. Query the system locale. |
1621 | 577 if (LocaleRefGetPartString(NULL, |
578 kLocaleLanguageMask | kLocaleLanguageVariantMask | | |
579 kLocaleRegionMask | kLocaleRegionVariantMask, | |
22417
68115baaf9e4
patch 8.2.1757: Mac: default locale is lacking the encoding
Bram Moolenaar <Bram@vim.org>
parents:
22379
diff
changeset
|
580 sizeof(buf) - 10, buf) == noErr && *buf) |
1621 | 581 { |
22417
68115baaf9e4
patch 8.2.1757: Mac: default locale is lacking the encoding
Bram Moolenaar <Bram@vim.org>
parents:
22379
diff
changeset
|
582 if (strcasestr(buf, "utf-8") == NULL) |
68115baaf9e4
patch 8.2.1757: Mac: default locale is lacking the encoding
Bram Moolenaar <Bram@vim.org>
parents:
22379
diff
changeset
|
583 strcat(buf, ".UTF-8"); |
1621 | 584 vim_setenv((char_u *)"LANG", (char_u *)buf); |
585 # ifdef HAVE_LOCALE_H | |
586 setlocale(LC_ALL, ""); | |
587 # endif | |
30310
029c59bf78f1
patch 9.0.0491: no good reason to build without the float feature
Bram Moolenaar <Bram@vim.org>
parents:
26771
diff
changeset
|
588 # if defined(LC_NUMERIC) |
22379
e1e24b1dba6e
patch 8.2.1738: Mac: str2float() recognizes comma instead of decimal point
Bram Moolenaar <Bram@vim.org>
parents:
21745
diff
changeset
|
589 // Make sure strtod() uses a decimal point, not a comma. |
e1e24b1dba6e
patch 8.2.1738: Mac: str2float() recognizes comma instead of decimal point
Bram Moolenaar <Bram@vim.org>
parents:
21745
diff
changeset
|
590 setlocale(LC_NUMERIC, "C"); |
e1e24b1dba6e
patch 8.2.1738: Mac: str2float() recognizes comma instead of decimal point
Bram Moolenaar <Bram@vim.org>
parents:
21745
diff
changeset
|
591 # endif |
1621 | 592 } |
593 } | |
594 } | |
18810
44b855153d8e
patch 8.1.2393: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16825
diff
changeset
|
595 #endif // MACOS_CONVERT |