Mercurial > vim
annotate src/os_mac_conv.c @ 2431:7b764999f9b9 vim73
Update for Lua interface. (Luis Carvalho)
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Wed, 28 Jul 2010 22:46:08 +0200 |
parents | 543ea69d037f |
children | acda456c788a |
rev | line source |
---|---|
18 | 1 /* vi:set ts=8 sts=4 sw=4: |
2 * | |
3 * VIM - Vi IMproved by Bram Moolenaar | |
4 * | |
5 * Do ":help uganda" in Vim to read copying and usage conditions. | |
6 * Do ":help credits" in Vim to see a list of people who contributed. | |
7 * See README.txt for an overview of the Vim source code. | |
8 */ | |
9 /* | |
10 * os_mac_conv.c: Code specifically for Mac string conversions. | |
11 * | |
12 * This code has been put in a separate file to avoid the conflicts that are | |
13 * caused by including both the X11 and Carbon header files. | |
14 */ | |
15 | |
16 #define NO_X11_INCLUDES | |
17 #include "vim.h" | |
2309
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
18 #ifndef FEAT_GUI_MAC |
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
19 # include <CoreServices/CoreServices.h> |
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
20 #endif |
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
21 |
18 | 22 |
766 | 23 #if defined(MACOS_CONVERT) || defined(PROTO) |
2309
543ea69d037f
Add clipboard support in Mac console. (Bjorn Winckler)
Bram Moolenaar <bram@vim.org>
parents:
1621
diff
changeset
|
24 |
766 | 25 # ifdef PROTO |
26 /* A few dummy types to be able to generate function prototypes. */ | |
27 typedef int UniChar; | |
28 typedef int *TECObjectRef; | |
29 typedef int CFStringRef; | |
30 # endif | |
31 | |
168 | 32 static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen)); |
33 static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen)); | |
34 | |
35 /* Converter for composing decomposed HFS+ file paths */ | |
36 static TECObjectRef gPathConverter; | |
37 /* Converter used by mac_utf16_to_utf8 */ | |
38 static TECObjectRef gUTF16ToUTF8Converter; | |
39 | |
18 | 40 /* |
41 * A Mac version of string_convert_ext() for special cases. | |
42 */ | |
43 char_u * | |
44 mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp) | |
45 char_u *ptr; | |
46 int len; | |
47 int *lenp; | |
48 int fail_on_error; | |
49 int from_enc; | |
50 int to_enc; | |
51 int *unconvlenp; | |
52 { | |
53 char_u *retval, *d; | |
54 CFStringRef cfstr; | |
55 int buflen, in, out, l, i; | |
56 CFStringEncoding from; | |
57 CFStringEncoding to; | |
58 | |
59 switch (from_enc) | |
60 { | |
61 case 'l': from = kCFStringEncodingISOLatin1; break; | |
62 case 'm': from = kCFStringEncodingMacRoman; break; | |
63 case 'u': from = kCFStringEncodingUTF8; break; | |
64 default: return NULL; | |
65 } | |
66 switch (to_enc) | |
67 { | |
68 case 'l': to = kCFStringEncodingISOLatin1; break; | |
69 case 'm': to = kCFStringEncodingMacRoman; break; | |
70 case 'u': to = kCFStringEncodingUTF8; break; | |
71 default: return NULL; | |
72 } | |
73 | |
74 if (unconvlenp != NULL) | |
75 *unconvlenp = 0; | |
76 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); | |
77 | |
168 | 78 if(cfstr == NULL) |
79 fprintf(stderr, "Encoding failed\n"); | |
18 | 80 /* When conversion failed, try excluding bytes from the end, helps when |
81 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid | |
1212 | 82 * looping a long time when there really is something unconvertible. */ |
18 | 83 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) |
84 { | |
85 --len; | |
86 ++*unconvlenp; | |
87 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); | |
88 } | |
89 if (cfstr == NULL) | |
90 return NULL; | |
168 | 91 |
18 | 92 if (to == kCFStringEncodingUTF8) |
93 buflen = len * 6 + 1; | |
94 else | |
95 buflen = len + 1; | |
96 retval = alloc(buflen); | |
97 if (retval == NULL) | |
98 { | |
99 CFRelease(cfstr); | |
100 return NULL; | |
101 } | |
168 | 102 |
103 #if 0 | |
104 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr)); | |
105 /* Determine output buffer size */ | |
106 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen); | |
107 retval = (buflen > 0) ? alloc(buflen) : NULL; | |
108 if (retval == NULL) { | |
109 CFRelease(cfstr); | |
110 return NULL; | |
111 } | |
112 | |
113 if (lenp) | |
114 *lenp = buflen / sizeof(char_u); | |
115 | |
116 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL)) | |
117 #endif | |
501 | 118 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to)) |
18 | 119 { |
120 CFRelease(cfstr); | |
121 if (fail_on_error) | |
122 { | |
123 vim_free(retval); | |
124 return NULL; | |
125 } | |
126 | |
168 | 127 fprintf(stderr, "Trying char-by-char conversion...\n"); |
18 | 128 /* conversion failed for the whole string, but maybe it will work |
129 * for each character */ | |
130 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) | |
131 { | |
132 if (from == kCFStringEncodingUTF8) | |
474 | 133 l = utf_ptr2len(ptr + in); |
18 | 134 else |
135 l = 1; | |
136 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0); | |
137 if (cfstr == NULL) | |
138 { | |
139 *d++ = '?'; | |
140 out++; | |
141 } | |
142 else | |
143 { | |
501 | 144 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to)) |
18 | 145 { |
146 *d++ = '?'; | |
147 out++; | |
148 } | |
149 else | |
150 { | |
501 | 151 i = STRLEN(d); |
18 | 152 d += i; |
153 out += i; | |
154 } | |
155 CFRelease(cfstr); | |
156 } | |
157 in += l; | |
158 } | |
159 *d = NUL; | |
160 if (lenp != NULL) | |
161 *lenp = out; | |
162 return retval; | |
163 } | |
164 CFRelease(cfstr); | |
165 if (lenp != NULL) | |
501 | 166 *lenp = STRLEN(retval); |
168 | 167 |
18 | 168 return retval; |
169 } | |
170 | |
171 /* | |
172 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using | |
173 * standard Carbon framework. | |
174 * Input: "ptr[*sizep]". | |
175 * "real_size" is the size of the buffer that "ptr" points to. | |
176 * output is in-place, "sizep" is adjusted. | |
177 * Returns OK or FAIL. | |
178 */ | |
179 int | |
180 macroman2enc(ptr, sizep, real_size) | |
181 char_u *ptr; | |
182 long *sizep; | |
183 long real_size; | |
184 { | |
185 CFStringRef cfstr; | |
186 CFRange r; | |
187 CFIndex len = *sizep; | |
188 | |
189 /* MacRoman is an 8-bit encoding, no need to move bytes to | |
190 * conv_rest[]. */ | |
191 cfstr = CFStringCreateWithBytes(NULL, ptr, len, | |
192 kCFStringEncodingMacRoman, 0); | |
193 /* | |
194 * If there is a conversion error, try using another | |
195 * conversion. | |
196 */ | |
197 if (cfstr == NULL) | |
198 return FAIL; | |
199 | |
200 r.location = 0; | |
201 r.length = CFStringGetLength(cfstr); | |
202 if (r.length != CFStringGetBytes(cfstr, r, | |
203 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, | |
204 0, /* no lossy conversion */ | |
205 0, /* not external representation */ | |
206 ptr + *sizep, real_size - *sizep, &len)) | |
207 { | |
208 CFRelease(cfstr); | |
209 return FAIL; | |
210 } | |
211 CFRelease(cfstr); | |
212 mch_memmove(ptr, ptr + *sizep, len); | |
213 *sizep = len; | |
214 | |
215 return OK; | |
216 } | |
217 | |
218 /* | |
219 * Conversion from UTF-8 or latin1 to MacRoman. | |
220 * Input: "from[fromlen]" | |
221 * Output: "to[maxtolen]" length in "*tolenp" | |
222 * Unconverted rest in rest[*restlenp]. | |
223 * Returns OK or FAIL. | |
224 */ | |
225 int | |
226 enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp) | |
227 char_u *from; | |
228 size_t fromlen; | |
229 char_u *to; | |
230 int *tolenp; | |
231 int maxtolen; | |
232 char_u *rest; | |
233 int *restlenp; | |
234 { | |
235 CFStringRef cfstr; | |
236 CFRange r; | |
237 CFIndex l; | |
238 | |
239 *restlenp = 0; | |
240 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, | |
241 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, | |
242 0); | |
243 while (cfstr == NULL && *restlenp < 3 && fromlen > 1) | |
244 { | |
245 rest[*restlenp++] = from[--fromlen]; | |
246 cfstr = CFStringCreateWithBytes(NULL, from, fromlen, | |
247 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, | |
248 0); | |
249 } | |
250 if (cfstr == NULL) | |
251 return FAIL; | |
252 | |
253 r.location = 0; | |
254 r.length = CFStringGetLength(cfstr); | |
255 if (r.length != CFStringGetBytes(cfstr, r, | |
256 kCFStringEncodingMacRoman, | |
257 0, /* no lossy conversion */ | |
258 0, /* not external representation (since vim | |
259 * handles this internally */ | |
260 to, maxtolen, &l)) | |
261 { | |
262 CFRelease(cfstr); | |
263 return FAIL; | |
264 } | |
265 CFRelease(cfstr); | |
266 *tolenp = l; | |
267 return OK; | |
268 } | |
20 | 269 |
168 | 270 /* |
271 * Initializes text converters | |
272 */ | |
273 void | |
274 mac_conv_init() | |
275 { | |
276 TextEncoding utf8_encoding; | |
277 TextEncoding utf8_hfsplus_encoding; | |
278 TextEncoding utf8_canon_encoding; | |
279 TextEncoding utf16_encoding; | |
280 | |
281 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
282 kTextEncodingDefaultVariant, kUnicodeUTF8Format); | |
283 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
284 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format); | |
285 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
286 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format); | |
287 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
288 kTextEncodingDefaultVariant, kUnicode16BitFormat); | |
289 | |
290 if (TECCreateConverter(&gPathConverter, utf8_encoding, | |
291 utf8_hfsplus_encoding) != noErr) | |
292 gPathConverter = NULL; | |
293 | |
294 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, | |
295 utf8_canon_encoding) != noErr) | |
179 | 296 { |
297 /* On pre-10.3, Unicode normalization is not available so | |
298 * fall back to non-normalizing converter */ | |
299 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, | |
300 utf8_encoding) != noErr) | |
301 gUTF16ToUTF8Converter = NULL; | |
302 } | |
168 | 303 } |
304 | |
305 /* | |
306 * Destroys text converters | |
307 */ | |
308 void | |
309 mac_conv_cleanup() | |
310 { | |
311 if (gUTF16ToUTF8Converter) | |
312 { | |
313 TECDisposeConverter(gUTF16ToUTF8Converter); | |
314 gUTF16ToUTF8Converter = NULL; | |
315 } | |
316 | |
317 if (gPathConverter) | |
318 { | |
319 TECDisposeConverter(gPathConverter); | |
320 gPathConverter = NULL; | |
321 } | |
322 } | |
323 | |
324 /* | |
325 * Conversion from UTF-16 UniChars to 'encoding' | |
1621 | 326 * The function signature uses the real type of UniChar (as typedef'ed in |
327 * CFBase.h) to avoid clashes with X11 header files in the .pro file | |
168 | 328 */ |
329 char_u * | |
330 mac_utf16_to_enc(from, fromLen, actualLen) | |
1621 | 331 unsigned short *from; |
168 | 332 size_t fromLen; |
333 size_t *actualLen; | |
334 { | |
335 /* Following code borrows somewhat from os_mswin.c */ | |
336 vimconv_T conv; | |
337 size_t utf8_len; | |
338 char_u *utf8_str; | |
339 char_u *result = NULL; | |
340 | |
341 /* Convert to utf-8 first, works better with iconv */ | |
342 utf8_len = 0; | |
343 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len); | |
344 | |
345 if (utf8_str) | |
346 { | |
347 /* We might be called before we have p_enc set up. */ | |
348 conv.vc_type = CONV_NONE; | |
349 | |
350 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim | |
351 * internal unicode is always utf-8) so don't convert in such cases */ | |
352 | |
353 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0) | |
354 convert_setup(&conv, (char_u *)"utf-8", | |
355 p_enc? p_enc: (char_u *)"macroman"); | |
356 if (conv.vc_type == CONV_NONE) | |
357 { | |
358 /* p_enc is utf-8, so we're done. */ | |
359 result = utf8_str; | |
360 } | |
361 else | |
362 { | |
363 result = string_convert(&conv, utf8_str, (int *)&utf8_len); | |
364 vim_free(utf8_str); | |
365 } | |
366 | |
367 convert_setup(&conv, NULL, NULL); | |
368 | |
369 if (actualLen) | |
370 *actualLen = utf8_len; | |
371 } | |
372 else if (actualLen) | |
373 *actualLen = 0; | |
374 | |
375 return result; | |
376 } | |
377 | |
378 /* | |
379 * Conversion from 'encoding' to UTF-16 UniChars | |
1621 | 380 * The function return uses the real type of UniChar (as typedef'ed in |
381 * CFBase.h) to avoid clashes with X11 header files in the .pro file | |
168 | 382 */ |
1621 | 383 unsigned short * |
168 | 384 mac_enc_to_utf16(from, fromLen, actualLen) |
385 char_u *from; | |
386 size_t fromLen; | |
387 size_t *actualLen; | |
388 { | |
389 /* Following code borrows somewhat from os_mswin.c */ | |
390 vimconv_T conv; | |
391 size_t utf8_len; | |
392 char_u *utf8_str; | |
393 UniChar *result = NULL; | |
394 Boolean should_free_utf8 = FALSE; | |
395 | |
396 do | |
397 { | |
398 /* Use MacRoman by default, we might be called before we have p_enc | |
399 * set up. Convert to utf-8 first, works better with iconv(). Does | |
400 * nothing if 'encoding' is "utf-8". */ | |
401 conv.vc_type = CONV_NONE; | |
402 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 && | |
403 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman", | |
404 (char_u *)"utf-8") == FAIL) | |
405 break; | |
406 | |
407 if (conv.vc_type != CONV_NONE) | |
408 { | |
409 utf8_len = fromLen; | |
410 utf8_str = string_convert(&conv, from, (int *)&utf8_len); | |
411 should_free_utf8 = TRUE; | |
412 } | |
413 else | |
414 { | |
415 utf8_str = from; | |
416 utf8_len = fromLen; | |
417 } | |
418 | |
419 if (utf8_str == NULL) | |
420 break; | |
421 | |
422 convert_setup(&conv, NULL, NULL); | |
423 | |
424 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen); | |
425 | |
426 if (should_free_utf8) | |
427 vim_free(utf8_str); | |
428 return result; | |
429 } | |
430 while (0); | |
431 | |
432 if (actualLen) | |
433 *actualLen = 0; | |
434 | |
435 return result; | |
436 } | |
437 | |
438 /* | |
439 * Converts from UTF-16 UniChars to CFString | |
1621 | 440 * The void * return type is actually a CFStringRef |
168 | 441 */ |
1621 | 442 void * |
168 | 443 mac_enc_to_cfstring(from, fromLen) |
444 char_u *from; | |
445 size_t fromLen; | |
446 { | |
447 UniChar *utf16_str; | |
448 size_t utf16_len; | |
449 CFStringRef result = NULL; | |
450 | |
451 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len); | |
452 if (utf16_str) | |
453 { | |
454 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar)); | |
455 vim_free(utf16_str); | |
456 } | |
457 | |
1621 | 458 return (void *)result; |
168 | 459 } |
460 | |
461 /* | |
462 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8 | |
463 */ | |
464 char_u * | |
465 mac_precompose_path(decompPath, decompLen, precompLen) | |
466 char_u *decompPath; | |
467 size_t decompLen; | |
468 size_t *precompLen; | |
469 { | |
470 char_u *result = NULL; | |
471 size_t actualLen = 0; | |
472 | |
473 if (gPathConverter) | |
474 { | |
475 result = alloc(decompLen); | |
476 if (result) | |
477 { | |
478 if (TECConvertText(gPathConverter, decompPath, | |
479 decompLen, &decompLen, result, | |
480 decompLen, &actualLen) != noErr) | |
481 { | |
482 vim_free(result); | |
483 result = NULL; | |
484 } | |
485 } | |
486 } | |
487 | |
488 if (precompLen) | |
489 *precompLen = actualLen; | |
490 | |
491 return result; | |
492 } | |
493 | |
494 /* | |
495 * Converts from UTF-16 UniChars to precomposed UTF-8 | |
496 */ | |
766 | 497 static char_u * |
168 | 498 mac_utf16_to_utf8(from, fromLen, actualLen) |
499 UniChar *from; | |
500 size_t fromLen; | |
501 size_t *actualLen; | |
502 { | |
503 ByteCount utf8_len; | |
504 ByteCount inputRead; | |
505 char_u *result; | |
506 | |
507 if (gUTF16ToUTF8Converter) | |
508 { | |
509 result = alloc(fromLen * 6 + 1); | |
510 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from, | |
511 fromLen, &inputRead, result, | |
512 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr) | |
513 { | |
514 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead); | |
515 utf8_len += inputRead; | |
516 } | |
517 else | |
518 { | |
519 vim_free(result); | |
520 result = NULL; | |
521 } | |
522 } | |
523 else | |
524 { | |
525 result = NULL; | |
526 } | |
527 | |
528 if (actualLen) | |
529 *actualLen = result ? utf8_len : 0; | |
530 | |
531 return result; | |
532 } | |
533 | |
534 /* | |
535 * Converts from UTF-8 to UTF-16 UniChars | |
536 */ | |
766 | 537 static UniChar * |
168 | 538 mac_utf8_to_utf16(from, fromLen, actualLen) |
539 char_u *from; | |
540 size_t fromLen; | |
541 size_t *actualLen; | |
542 { | |
543 CFStringRef utf8_str; | |
544 CFRange convertRange; | |
545 UniChar *result = NULL; | |
546 | |
547 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen, | |
548 kCFStringEncodingUTF8, FALSE); | |
549 | |
550 if (utf8_str == NULL) { | |
551 if (actualLen) | |
552 *actualLen = 0; | |
553 return NULL; | |
554 } | |
555 | |
556 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str)); | |
557 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar)); | |
558 | |
559 CFStringGetCharacters(utf8_str, convertRange, result); | |
560 | |
561 CFRelease(utf8_str); | |
562 | |
563 if (actualLen) | |
564 *actualLen = convertRange.length * sizeof(UniChar); | |
565 | |
566 return result; | |
567 } | |
1621 | 568 |
569 /* | |
570 * Sets LANG environment variable in Vim from Mac locale | |
571 */ | |
572 void | |
573 mac_lang_init() { | |
574 if (mch_getenv((char_u *)"LANG") == NULL) | |
575 { | |
576 char buf[20]; | |
577 if (LocaleRefGetPartString(NULL, | |
578 kLocaleLanguageMask | kLocaleLanguageVariantMask | | |
579 kLocaleRegionMask | kLocaleRegionVariantMask, | |
580 sizeof buf, buf) == noErr && *buf) | |
581 { | |
582 vim_setenv((char_u *)"LANG", (char_u *)buf); | |
583 # ifdef HAVE_LOCALE_H | |
584 setlocale(LC_ALL, ""); | |
585 # endif | |
586 } | |
587 } | |
588 } | |
766 | 589 #endif /* MACOS_CONVERT */ |