Mercurial > vim
annotate src/os_mac_conv.c @ 6343:edfa81ea1711 v7.4.504
updated for version 7.4.504
Problem: Restriction of the MS-Windows installer that the path must end in
"Vim" prevents installing more than one version.
Solution: Remove the restriction. (Tim Lebedkov)
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Wed, 05 Nov 2014 18:18:17 +0100 |
parents | 314d9368069e |
children | 0b6c37dd858d |
rev | line source |
---|---|
18 | 1 /* vi:set ts=8 sts=4 sw=4: |
2 * | |
3 * VIM - Vi IMproved by Bram Moolenaar | |
4 * | |
5 * Do ":help uganda" in Vim to read copying and usage conditions. | |
6 * Do ":help credits" in Vim to see a list of people who contributed. | |
7 * See README.txt for an overview of the Vim source code. | |
8 */ | |
9 /* | |
10 * os_mac_conv.c: Code specifically for Mac string conversions. | |
11 * | |
12 * This code has been put in a separate file to avoid the conflicts that are | |
13 * caused by including both the X11 and Carbon header files. | |
14 */ | |
15 | |
16 #define NO_X11_INCLUDES | |
2891 | 17 #define BalloonEval int /* used in header files */ |
18 | |
18 | 19 #include "vim.h" |
2309
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
20 #ifndef FEAT_GUI_MAC |
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
21 # include <CoreServices/CoreServices.h> |
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
22 #endif |
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
23 |
18 | 24 |
766 | 25 #if defined(MACOS_CONVERT) || defined(PROTO) |
2309
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
26 |
766 | 27 # ifdef PROTO |
28 /* A few dummy types to be able to generate function prototypes. */ | |
29 typedef int UniChar; | |
30 typedef int *TECObjectRef; | |
31 typedef int CFStringRef; | |
32 # endif | |
33 | |
168 | 34 static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen)); |
35 static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen)); | |
36 | |
37 /* Converter for composing decomposed HFS+ file paths */ | |
38 static TECObjectRef gPathConverter; | |
39 /* Converter used by mac_utf16_to_utf8 */ | |
40 static TECObjectRef gUTF16ToUTF8Converter; | |
41 | |
18 | 42 /* |
43 * A Mac version of string_convert_ext() for special cases. | |
44 */ | |
45 char_u * | |
46 mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp) | |
47 char_u *ptr; | |
48 int len; | |
49 int *lenp; | |
50 int fail_on_error; | |
51 int from_enc; | |
52 int to_enc; | |
53 int *unconvlenp; | |
54 { | |
55 char_u *retval, *d; | |
56 CFStringRef cfstr; | |
57 int buflen, in, out, l, i; | |
58 CFStringEncoding from; | |
59 CFStringEncoding to; | |
60 | |
61 switch (from_enc) | |
62 { | |
63 case 'l': from = kCFStringEncodingISOLatin1; break; | |
64 case 'm': from = kCFStringEncodingMacRoman; break; | |
65 case 'u': from = kCFStringEncodingUTF8; break; | |
66 default: return NULL; | |
67 } | |
68 switch (to_enc) | |
69 { | |
70 case 'l': to = kCFStringEncodingISOLatin1; break; | |
71 case 'm': to = kCFStringEncodingMacRoman; break; | |
72 case 'u': to = kCFStringEncodingUTF8; break; | |
73 default: return NULL; | |
74 } | |
75 | |
76 if (unconvlenp != NULL) | |
77 *unconvlenp = 0; | |
78 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); | |
79 | |
3143 | 80 if (cfstr == NULL) |
168 | 81 fprintf(stderr, "Encoding failed\n"); |
18 | 82 /* When conversion failed, try excluding bytes from the end, helps when |
83 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid | |
1212 | 84 * looping a long time when there really is something unconvertible. */ |
18 | 85 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) |
86 { | |
87 --len; | |
88 ++*unconvlenp; | |
89 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); | |
90 } | |
91 if (cfstr == NULL) | |
92 return NULL; | |
168 | 93 |
18 | 94 if (to == kCFStringEncodingUTF8) |
95 buflen = len * 6 + 1; | |
96 else | |
97 buflen = len + 1; | |
98 retval = alloc(buflen); | |
99 if (retval == NULL) | |
100 { | |
101 CFRelease(cfstr); | |
102 return NULL; | |
103 } | |
168 | 104 |
105 #if 0 | |
106 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr)); | |
107 /* Determine output buffer size */ | |
108 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen); | |
109 retval = (buflen > 0) ? alloc(buflen) : NULL; | |
110 if (retval == NULL) { | |
111 CFRelease(cfstr); | |
112 return NULL; | |
113 } | |
114 | |
115 if (lenp) | |
116 *lenp = buflen / sizeof(char_u); | |
117 | |
118 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL)) | |
119 #endif | |
501 | 120 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to)) |
18 | 121 { |
122 CFRelease(cfstr); | |
123 if (fail_on_error) | |
124 { | |
125 vim_free(retval); | |
126 return NULL; | |
127 } | |
128 | |
168 | 129 fprintf(stderr, "Trying char-by-char conversion...\n"); |
18 | 130 /* conversion failed for the whole string, but maybe it will work |
131 * for each character */ | |
132 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) | |
133 { | |
134 if (from == kCFStringEncodingUTF8) | |
474 | 135 l = utf_ptr2len(ptr + in); |
18 | 136 else |
137 l = 1; | |
138 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0); | |
139 if (cfstr == NULL) | |
140 { | |
141 *d++ = '?'; | |
142 out++; | |
143 } | |
144 else | |
145 { | |
501 | 146 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to)) |
18 | 147 { |
148 *d++ = '?'; | |
149 out++; | |
150 } | |
151 else | |
152 { | |
501 | 153 i = STRLEN(d); |
18 | 154 d += i; |
155 out += i; | |
156 } | |
157 CFRelease(cfstr); | |
158 } | |
159 in += l; | |
160 } | |
161 *d = NUL; | |
162 if (lenp != NULL) | |
163 *lenp = out; | |
164 return retval; | |
165 } | |
166 CFRelease(cfstr); | |
167 if (lenp != NULL) | |
501 | 168 *lenp = STRLEN(retval); |
168 | 169 |
18 | 170 return retval; |
171 } | |
172 | |
173 /* | |
174 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using | |
175 * standard Carbon framework. | |
176 * Input: "ptr[*sizep]". | |
177 * "real_size" is the size of the buffer that "ptr" points to. | |
178 * output is in-place, "sizep" is adjusted. | |
179 * Returns OK or FAIL. | |
180 */ | |
181 int | |
182 macroman2enc(ptr, sizep, real_size) | |
183 char_u *ptr; | |
184 long *sizep; | |
185 long real_size; | |
186 { | |
187 CFStringRef cfstr; | |
188 CFRange r; | |
189 CFIndex len = *sizep; | |
190 | |
191 /* MacRoman is an 8-bit encoding, no need to move bytes to | |
192 * conv_rest[]. */ | |
193 cfstr = CFStringCreateWithBytes(NULL, ptr, len, | |
194 kCFStringEncodingMacRoman, 0); | |
195 /* | |
196 * If there is a conversion error, try using another | |
197 * conversion. | |
198 */ | |
199 if (cfstr == NULL) | |
200 return FAIL; | |
201 | |
202 r.location = 0; | |
203 r.length = CFStringGetLength(cfstr); | |
204 if (r.length != CFStringGetBytes(cfstr, r, | |
205 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, | |
206 0, /* no lossy conversion */ | |
207 0, /* not external representation */ | |
208 ptr + *sizep, real_size - *sizep, &len)) | |
209 { | |
210 CFRelease(cfstr); | |
211 return FAIL; | |
212 } | |
213 CFRelease(cfstr); | |
214 mch_memmove(ptr, ptr + *sizep, len); | |
215 *sizep = len; | |
216 | |
217 return OK; | |
218 } | |
219 | |
220 /* | |
221 * Conversion from UTF-8 or latin1 to MacRoman. | |
222 * Input: "from[fromlen]" | |
223 * Output: "to[maxtolen]" length in "*tolenp" | |
224 * Unconverted rest in rest[*restlenp]. | |
225 * Returns OK or FAIL. | |
226 */ | |
227 int | |
228 enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp) | |
229 char_u *from; | |
230 size_t fromlen; | |
231 char_u *to; | |
232 int *tolenp; | |
233 int maxtolen; | |
234 char_u *rest; | |
235 int *restlenp; | |
236 { | |
237 CFStringRef cfstr; | |
238 CFRange r; | |
239 CFIndex l; | |
240 | |
241 *restlenp = 0; | |
242 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, | |
243 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, | |
244 0); | |
245 while (cfstr == NULL && *restlenp < 3 && fromlen > 1) | |
246 { | |
247 rest[*restlenp++] = from[--fromlen]; | |
248 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, | |
249 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, | |
250 0); | |
251 } | |
252 if (cfstr == NULL) | |
253 return FAIL; | |
254 | |
255 r.location = 0; | |
256 r.length = CFStringGetLength(cfstr); | |
257 if (r.length != CFStringGetBytes(cfstr, r, | |
258 kCFStringEncodingMacRoman, | |
259 0, /* no lossy conversion */ | |
260 0, /* not external representation (since vim | |
261 * handles this internally */ | |
262 to, maxtolen, &l)) | |
263 { | |
264 CFRelease(cfstr); | |
265 return FAIL; | |
266 } | |
267 CFRelease(cfstr); | |
268 *tolenp = l; | |
269 return OK; | |
270 } | |
20 | 271 |
168 | 272 /* |
273 * Initializes text converters | |
274 */ | |
275 void | |
276 mac_conv_init() | |
277 { | |
278 TextEncoding utf8_encoding; | |
279 TextEncoding utf8_hfsplus_encoding; | |
280 TextEncoding utf8_canon_encoding; | |
281 TextEncoding utf16_encoding; | |
282 | |
283 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
284 kTextEncodingDefaultVariant, kUnicodeUTF8Format); | |
285 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
286 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format); | |
287 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
288 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format); | |
289 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
290 kTextEncodingDefaultVariant, kUnicode16BitFormat); | |
291 | |
292 if (TECCreateConverter(&gPathConverter, utf8_encoding, | |
293 utf8_hfsplus_encoding) != noErr) | |
294 gPathConverter = NULL; | |
295 | |
296 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, | |
297 utf8_canon_encoding) != noErr) | |
179 | 298 { |
299 /* On pre-10.3, Unicode normalization is not available so | |
300 * fall back to non-normalizing converter */ | |
301 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, | |
302 utf8_encoding) != noErr) | |
303 gUTF16ToUTF8Converter = NULL; | |
304 } | |
168 | 305 } |
306 | |
307 /* | |
308 * Destroys text converters | |
309 */ | |
310 void | |
311 mac_conv_cleanup() | |
312 { | |
313 if (gUTF16ToUTF8Converter) | |
314 { | |
315 TECDisposeConverter(gUTF16ToUTF8Converter); | |
316 gUTF16ToUTF8Converter = NULL; | |
317 } | |
318 | |
319 if (gPathConverter) | |
320 { | |
321 TECDisposeConverter(gPathConverter); | |
322 gPathConverter = NULL; | |
323 } | |
324 } | |
325 | |
326 /* | |
327 * Conversion from UTF-16 UniChars to 'encoding' | |
1621 | 328 * The function signature uses the real type of UniChar (as typedef'ed in |
329 * CFBase.h) to avoid clashes with X11 header files in the .pro file | |
168 | 330 */ |
331 char_u * | |
332 mac_utf16_to_enc(from, fromLen, actualLen) | |
1621 | 333 unsigned short *from; |
168 | 334 size_t fromLen; |
335 size_t *actualLen; | |
336 { | |
337 /* Following code borrows somewhat from os_mswin.c */ | |
338 vimconv_T conv; | |
339 size_t utf8_len; | |
340 char_u *utf8_str; | |
341 char_u *result = NULL; | |
342 | |
343 /* Convert to utf-8 first, works better with iconv */ | |
344 utf8_len = 0; | |
345 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len); | |
346 | |
347 if (utf8_str) | |
348 { | |
349 /* We might be called before we have p_enc set up. */ | |
350 conv.vc_type = CONV_NONE; | |
351 | |
352 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim | |
353 * internal unicode is always utf-8) so don't convert in such cases */ | |
354 | |
355 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0) | |
356 convert_setup(&conv, (char_u *)"utf-8", | |
357 p_enc? p_enc: (char_u *)"macroman"); | |
358 if (conv.vc_type == CONV_NONE) | |
359 { | |
360 /* p_enc is utf-8, so we're done. */ | |
361 result = utf8_str; | |
362 } | |
363 else | |
364 { | |
365 result = string_convert(&conv, utf8_str, (int *)&utf8_len); | |
366 vim_free(utf8_str); | |
367 } | |
368 | |
369 convert_setup(&conv, NULL, NULL); | |
370 | |
371 if (actualLen) | |
372 *actualLen = utf8_len; | |
373 } | |
374 else if (actualLen) | |
375 *actualLen = 0; | |
376 | |
377 return result; | |
378 } | |
379 | |
380 /* | |
381 * Conversion from 'encoding' to UTF-16 UniChars | |
1621 | 382 * The function return uses the real type of UniChar (as typedef'ed in |
383 * CFBase.h) to avoid clashes with X11 header files in the .pro file | |
168 | 384 */ |
1621 | 385 unsigned short * |
168 | 386 mac_enc_to_utf16(from, fromLen, actualLen) |
387 char_u *from; | |
388 size_t fromLen; | |
389 size_t *actualLen; | |
390 { | |
391 /* Following code borrows somewhat from os_mswin.c */ | |
392 vimconv_T conv; | |
393 size_t utf8_len; | |
394 char_u *utf8_str; | |
395 UniChar *result = NULL; | |
396 Boolean should_free_utf8 = FALSE; | |
397 | |
398 do | |
399 { | |
400 /* Use MacRoman by default, we might be called before we have p_enc | |
401 * set up. Convert to utf-8 first, works better with iconv(). Does | |
402 * nothing if 'encoding' is "utf-8". */ | |
403 conv.vc_type = CONV_NONE; | |
404 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 && | |
405 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman", | |
406 (char_u *)"utf-8") == FAIL) | |
407 break; | |
408 | |
409 if (conv.vc_type != CONV_NONE) | |
410 { | |
411 utf8_len = fromLen; | |
412 utf8_str = string_convert(&conv, from, (int *)&utf8_len); | |
413 should_free_utf8 = TRUE; | |
414 } | |
415 else | |
416 { | |
417 utf8_str = from; | |
418 utf8_len = fromLen; | |
419 } | |
420 | |
421 if (utf8_str == NULL) | |
422 break; | |
423 | |
424 convert_setup(&conv, NULL, NULL); | |
425 | |
426 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen); | |
427 | |
428 if (should_free_utf8) | |
429 vim_free(utf8_str); | |
430 return result; | |
431 } | |
432 while (0); | |
433 | |
434 if (actualLen) | |
435 *actualLen = 0; | |
436 | |
437 return result; | |
438 } | |
439 | |
440 /* | |
441 * Converts from UTF-16 UniChars to CFString | |
1621 | 442 * The void * return type is actually a CFStringRef |
168 | 443 */ |
1621 | 444 void * |
168 | 445 mac_enc_to_cfstring(from, fromLen) |
446 char_u *from; | |
447 size_t fromLen; | |
448 { | |
449 UniChar *utf16_str; | |
450 size_t utf16_len; | |
451 CFStringRef result = NULL; | |
452 | |
453 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len); | |
454 if (utf16_str) | |
455 { | |
456 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar)); | |
457 vim_free(utf16_str); | |
458 } | |
459 | |
1621 | 460 return (void *)result; |
168 | 461 } |
462 | |
463 /* | |
464 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8 | |
465 */ | |
466 char_u * | |
467 mac_precompose_path(decompPath, decompLen, precompLen) | |
468 char_u *decompPath; | |
469 size_t decompLen; | |
470 size_t *precompLen; | |
471 { | |
472 char_u *result = NULL; | |
473 size_t actualLen = 0; | |
474 | |
475 if (gPathConverter) | |
476 { | |
477 result = alloc(decompLen); | |
478 if (result) | |
479 { | |
480 if (TECConvertText(gPathConverter, decompPath, | |
481 decompLen, &decompLen, result, | |
482 decompLen, &actualLen) != noErr) | |
483 { | |
484 vim_free(result); | |
485 result = NULL; | |
486 } | |
487 } | |
488 } | |
489 | |
490 if (precompLen) | |
491 *precompLen = actualLen; | |
492 | |
493 return result; | |
494 } | |
495 | |
496 /* | |
497 * Converts from UTF-16 UniChars to precomposed UTF-8 | |
498 */ | |
766 | 499 static char_u * |
168 | 500 mac_utf16_to_utf8(from, fromLen, actualLen) |
501 UniChar *from; | |
502 size_t fromLen; | |
503 size_t *actualLen; | |
504 { | |
505 ByteCount utf8_len; | |
506 ByteCount inputRead; | |
507 char_u *result; | |
508 | |
509 if (gUTF16ToUTF8Converter) | |
510 { | |
511 result = alloc(fromLen * 6 + 1); | |
512 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from, | |
513 fromLen, &inputRead, result, | |
514 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr) | |
515 { | |
516 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead); | |
517 utf8_len += inputRead; | |
518 } | |
519 else | |
520 { | |
521 vim_free(result); | |
522 result = NULL; | |
523 } | |
524 } | |
525 else | |
526 { | |
527 result = NULL; | |
528 } | |
529 | |
530 if (actualLen) | |
531 *actualLen = result ? utf8_len : 0; | |
532 | |
533 return result; | |
534 } | |
535 | |
536 /* | |
537 * Converts from UTF-8 to UTF-16 UniChars | |
538 */ | |
766 | 539 static UniChar * |
168 | 540 mac_utf8_to_utf16(from, fromLen, actualLen) |
541 char_u *from; | |
542 size_t fromLen; | |
543 size_t *actualLen; | |
544 { | |
545 CFStringRef utf8_str; | |
546 CFRange convertRange; | |
547 UniChar *result = NULL; | |
548 | |
549 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen, | |
550 kCFStringEncodingUTF8, FALSE); | |
551 | |
552 if (utf8_str == NULL) { | |
553 if (actualLen) | |
554 *actualLen = 0; | |
555 return NULL; | |
556 } | |
557 | |
558 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str)); | |
559 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar)); | |
560 | |
561 CFStringGetCharacters(utf8_str, convertRange, result); | |
562 | |
563 CFRelease(utf8_str); | |
564 | |
565 if (actualLen) | |
566 *actualLen = convertRange.length * sizeof(UniChar); | |
567 | |
568 return result; | |
569 } | |
1621 | 570 |
571 /* | |
572 * Sets LANG environment variable in Vim from Mac locale | |
573 */ | |
574 void | |
575 mac_lang_init() { | |
576 if (mch_getenv((char_u *)"LANG") == NULL) | |
577 { | |
578 char buf[20]; | |
579 if (LocaleRefGetPartString(NULL, | |
580 kLocaleLanguageMask | kLocaleLanguageVariantMask | | |
581 kLocaleRegionMask | kLocaleRegionVariantMask, | |
582 sizeof buf, buf) == noErr && *buf) | |
583 { | |
584 vim_setenv((char_u *)"LANG", (char_u *)buf); | |
585 # ifdef HAVE_LOCALE_H | |
586 setlocale(LC_ALL, ""); | |
587 # endif | |
588 } | |
589 } | |
590 } | |
766 | 591 #endif /* MACOS_CONVERT */ |