18
|
1 /* vi:set ts=8 sts=4 sw=4:
|
|
2 *
|
|
3 * VIM - Vi IMproved by Bram Moolenaar
|
|
4 *
|
|
5 * Do ":help uganda" in Vim to read copying and usage conditions.
|
|
6 * Do ":help credits" in Vim to see a list of people who contributed.
|
|
7 * See README.txt for an overview of the Vim source code.
|
|
8 */
|
|
9 /*
|
|
10 * os_mac_conv.c: Code specifically for Mac string conversions.
|
|
11 *
|
|
12 * This code has been put in a separate file to avoid the conflicts that are
|
|
13 * caused by including both the X11 and Carbon header files.
|
|
14 */
|
|
15
|
|
16 #define NO_X11_INCLUDES
|
|
17 #include "vim.h"
|
|
18
|
20
|
19 #ifdef FEAT_MBYTE
|
168
|
20 static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
|
|
21 static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
|
|
22
|
|
23 /* Converter for composing decomposed HFS+ file paths */
|
|
24 static TECObjectRef gPathConverter;
|
|
25 /* Converter used by mac_utf16_to_utf8 */
|
|
26 static TECObjectRef gUTF16ToUTF8Converter;
|
|
27
|
18
|
28 /*
|
|
29 * A Mac version of string_convert_ext() for special cases.
|
|
30 */
|
|
31 char_u *
|
|
32 mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp)
|
|
33 char_u *ptr;
|
|
34 int len;
|
|
35 int *lenp;
|
|
36 int fail_on_error;
|
|
37 int from_enc;
|
|
38 int to_enc;
|
|
39 int *unconvlenp;
|
|
40 {
|
|
41 char_u *retval, *d;
|
|
42 CFStringRef cfstr;
|
|
43 int buflen, in, out, l, i;
|
|
44 CFStringEncoding from;
|
|
45 CFStringEncoding to;
|
|
46
|
|
47 switch (from_enc)
|
|
48 {
|
|
49 case 'l': from = kCFStringEncodingISOLatin1; break;
|
|
50 case 'm': from = kCFStringEncodingMacRoman; break;
|
|
51 case 'u': from = kCFStringEncodingUTF8; break;
|
|
52 default: return NULL;
|
|
53 }
|
|
54 switch (to_enc)
|
|
55 {
|
|
56 case 'l': to = kCFStringEncodingISOLatin1; break;
|
|
57 case 'm': to = kCFStringEncodingMacRoman; break;
|
|
58 case 'u': to = kCFStringEncodingUTF8; break;
|
|
59 default: return NULL;
|
|
60 }
|
|
61
|
|
62 if (unconvlenp != NULL)
|
|
63 *unconvlenp = 0;
|
|
64 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
|
|
65
|
168
|
66 if(cfstr == NULL)
|
|
67 fprintf(stderr, "Encoding failed\n");
|
18
|
68 /* When conversion failed, try excluding bytes from the end, helps when
|
|
69 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
|
|
70 * looping a long time when there really is something unconvertable. */
|
|
71 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
|
|
72 {
|
|
73 --len;
|
|
74 ++*unconvlenp;
|
|
75 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
|
|
76 }
|
|
77 if (cfstr == NULL)
|
|
78 return NULL;
|
168
|
79
|
18
|
80 if (to == kCFStringEncodingUTF8)
|
|
81 buflen = len * 6 + 1;
|
|
82 else
|
|
83 buflen = len + 1;
|
|
84 retval = alloc(buflen);
|
|
85 if (retval == NULL)
|
|
86 {
|
|
87 CFRelease(cfstr);
|
|
88 return NULL;
|
|
89 }
|
168
|
90
|
|
91 #if 0
|
|
92 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
|
|
93 /* Determine output buffer size */
|
|
94 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
|
|
95 retval = (buflen > 0) ? alloc(buflen) : NULL;
|
|
96 if (retval == NULL) {
|
|
97 CFRelease(cfstr);
|
|
98 return NULL;
|
|
99 }
|
|
100
|
|
101 if (lenp)
|
|
102 *lenp = buflen / sizeof(char_u);
|
|
103
|
|
104 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
|
|
105 #endif
|
501
|
106 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to))
|
18
|
107 {
|
|
108 CFRelease(cfstr);
|
|
109 if (fail_on_error)
|
|
110 {
|
|
111 vim_free(retval);
|
|
112 return NULL;
|
|
113 }
|
|
114
|
168
|
115 fprintf(stderr, "Trying char-by-char conversion...\n");
|
18
|
116 /* conversion failed for the whole string, but maybe it will work
|
|
117 * for each character */
|
|
118 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
|
|
119 {
|
|
120 if (from == kCFStringEncodingUTF8)
|
474
|
121 l = utf_ptr2len(ptr + in);
|
18
|
122 else
|
|
123 l = 1;
|
|
124 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
|
|
125 if (cfstr == NULL)
|
|
126 {
|
|
127 *d++ = '?';
|
|
128 out++;
|
|
129 }
|
|
130 else
|
|
131 {
|
501
|
132 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to))
|
18
|
133 {
|
|
134 *d++ = '?';
|
|
135 out++;
|
|
136 }
|
|
137 else
|
|
138 {
|
501
|
139 i = STRLEN(d);
|
18
|
140 d += i;
|
|
141 out += i;
|
|
142 }
|
|
143 CFRelease(cfstr);
|
|
144 }
|
|
145 in += l;
|
|
146 }
|
|
147 *d = NUL;
|
|
148 if (lenp != NULL)
|
|
149 *lenp = out;
|
|
150 return retval;
|
|
151 }
|
|
152 CFRelease(cfstr);
|
|
153 if (lenp != NULL)
|
501
|
154 *lenp = STRLEN(retval);
|
168
|
155
|
18
|
156 return retval;
|
|
157 }
|
|
158
|
|
159 /*
|
|
160 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
|
|
161 * standard Carbon framework.
|
|
162 * Input: "ptr[*sizep]".
|
|
163 * "real_size" is the size of the buffer that "ptr" points to.
|
|
164 * output is in-place, "sizep" is adjusted.
|
|
165 * Returns OK or FAIL.
|
|
166 */
|
|
167 int
|
|
168 macroman2enc(ptr, sizep, real_size)
|
|
169 char_u *ptr;
|
|
170 long *sizep;
|
|
171 long real_size;
|
|
172 {
|
|
173 CFStringRef cfstr;
|
|
174 CFRange r;
|
|
175 CFIndex len = *sizep;
|
|
176
|
|
177 /* MacRoman is an 8-bit encoding, no need to move bytes to
|
|
178 * conv_rest[]. */
|
|
179 cfstr = CFStringCreateWithBytes(NULL, ptr, len,
|
|
180 kCFStringEncodingMacRoman, 0);
|
|
181 /*
|
|
182 * If there is a conversion error, try using another
|
|
183 * conversion.
|
|
184 */
|
|
185 if (cfstr == NULL)
|
|
186 return FAIL;
|
|
187
|
|
188 r.location = 0;
|
|
189 r.length = CFStringGetLength(cfstr);
|
|
190 if (r.length != CFStringGetBytes(cfstr, r,
|
|
191 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
|
|
192 0, /* no lossy conversion */
|
|
193 0, /* not external representation */
|
|
194 ptr + *sizep, real_size - *sizep, &len))
|
|
195 {
|
|
196 CFRelease(cfstr);
|
|
197 return FAIL;
|
|
198 }
|
|
199 CFRelease(cfstr);
|
|
200 mch_memmove(ptr, ptr + *sizep, len);
|
|
201 *sizep = len;
|
|
202
|
|
203 return OK;
|
|
204 }
|
|
205
|
|
206 /*
|
|
207 * Conversion from UTF-8 or latin1 to MacRoman.
|
|
208 * Input: "from[fromlen]"
|
|
209 * Output: "to[maxtolen]" length in "*tolenp"
|
|
210 * Unconverted rest in rest[*restlenp].
|
|
211 * Returns OK or FAIL.
|
|
212 */
|
|
213 int
|
|
214 enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp)
|
|
215 char_u *from;
|
|
216 size_t fromlen;
|
|
217 char_u *to;
|
|
218 int *tolenp;
|
|
219 int maxtolen;
|
|
220 char_u *rest;
|
|
221 int *restlenp;
|
|
222 {
|
|
223 CFStringRef cfstr;
|
|
224 CFRange r;
|
|
225 CFIndex l;
|
|
226
|
|
227 *restlenp = 0;
|
|
228 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
|
|
229 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
|
|
230 0);
|
|
231 while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
|
|
232 {
|
|
233 rest[*restlenp++] = from[--fromlen];
|
|
234 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
|
|
235 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
|
|
236 0);
|
|
237 }
|
|
238 if (cfstr == NULL)
|
|
239 return FAIL;
|
|
240
|
|
241 r.location = 0;
|
|
242 r.length = CFStringGetLength(cfstr);
|
|
243 if (r.length != CFStringGetBytes(cfstr, r,
|
|
244 kCFStringEncodingMacRoman,
|
|
245 0, /* no lossy conversion */
|
|
246 0, /* not external representation (since vim
|
|
247 * handles this internally */
|
|
248 to, maxtolen, &l))
|
|
249 {
|
|
250 CFRelease(cfstr);
|
|
251 return FAIL;
|
|
252 }
|
|
253 CFRelease(cfstr);
|
|
254 *tolenp = l;
|
|
255 return OK;
|
|
256 }
|
20
|
257
|
168
|
258 /*
|
|
259 * Initializes text converters
|
|
260 */
|
|
261 void
|
|
262 mac_conv_init()
|
|
263 {
|
|
264 TextEncoding utf8_encoding;
|
|
265 TextEncoding utf8_hfsplus_encoding;
|
|
266 TextEncoding utf8_canon_encoding;
|
|
267 TextEncoding utf16_encoding;
|
|
268
|
|
269 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
|
|
270 kTextEncodingDefaultVariant, kUnicodeUTF8Format);
|
|
271 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
|
|
272 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
|
|
273 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
|
|
274 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
|
|
275 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
|
|
276 kTextEncodingDefaultVariant, kUnicode16BitFormat);
|
|
277
|
|
278 if (TECCreateConverter(&gPathConverter, utf8_encoding,
|
|
279 utf8_hfsplus_encoding) != noErr)
|
|
280 gPathConverter = NULL;
|
|
281
|
|
282 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
|
|
283 utf8_canon_encoding) != noErr)
|
179
|
284 {
|
|
285 /* On pre-10.3, Unicode normalization is not available so
|
|
286 * fall back to non-normalizing converter */
|
|
287 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
|
|
288 utf8_encoding) != noErr)
|
|
289 gUTF16ToUTF8Converter = NULL;
|
|
290 }
|
168
|
291 }
|
|
292
|
|
293 /*
|
|
294 * Destroys text converters
|
|
295 */
|
|
296 void
|
|
297 mac_conv_cleanup()
|
|
298 {
|
|
299 if (gUTF16ToUTF8Converter)
|
|
300 {
|
|
301 TECDisposeConverter(gUTF16ToUTF8Converter);
|
|
302 gUTF16ToUTF8Converter = NULL;
|
|
303 }
|
|
304
|
|
305 if (gPathConverter)
|
|
306 {
|
|
307 TECDisposeConverter(gPathConverter);
|
|
308 gPathConverter = NULL;
|
|
309 }
|
|
310 }
|
|
311
|
|
312 /*
|
|
313 * Conversion from UTF-16 UniChars to 'encoding'
|
|
314 */
|
|
315 char_u *
|
|
316 mac_utf16_to_enc(from, fromLen, actualLen)
|
|
317 UniChar *from;
|
|
318 size_t fromLen;
|
|
319 size_t *actualLen;
|
|
320 {
|
|
321 /* Following code borrows somewhat from os_mswin.c */
|
|
322 vimconv_T conv;
|
|
323 size_t utf8_len;
|
|
324 char_u *utf8_str;
|
|
325 char_u *result = NULL;
|
|
326
|
|
327 /* Convert to utf-8 first, works better with iconv */
|
|
328 utf8_len = 0;
|
|
329 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
|
|
330
|
|
331 if (utf8_str)
|
|
332 {
|
|
333 /* We might be called before we have p_enc set up. */
|
|
334 conv.vc_type = CONV_NONE;
|
|
335
|
|
336 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
|
|
337 * internal unicode is always utf-8) so don't convert in such cases */
|
|
338
|
|
339 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
|
|
340 convert_setup(&conv, (char_u *)"utf-8",
|
|
341 p_enc? p_enc: (char_u *)"macroman");
|
|
342 if (conv.vc_type == CONV_NONE)
|
|
343 {
|
|
344 /* p_enc is utf-8, so we're done. */
|
|
345 result = utf8_str;
|
|
346 }
|
|
347 else
|
|
348 {
|
|
349 result = string_convert(&conv, utf8_str, (int *)&utf8_len);
|
|
350 vim_free(utf8_str);
|
|
351 }
|
|
352
|
|
353 convert_setup(&conv, NULL, NULL);
|
|
354
|
|
355 if (actualLen)
|
|
356 *actualLen = utf8_len;
|
|
357 }
|
|
358 else if (actualLen)
|
|
359 *actualLen = 0;
|
|
360
|
|
361 return result;
|
|
362 }
|
|
363
|
|
364 /*
|
|
365 * Conversion from 'encoding' to UTF-16 UniChars
|
|
366 */
|
|
367 UniChar *
|
|
368 mac_enc_to_utf16(from, fromLen, actualLen)
|
|
369 char_u *from;
|
|
370 size_t fromLen;
|
|
371 size_t *actualLen;
|
|
372 {
|
|
373 /* Following code borrows somewhat from os_mswin.c */
|
|
374 vimconv_T conv;
|
|
375 size_t utf8_len;
|
|
376 char_u *utf8_str;
|
|
377 UniChar *result = NULL;
|
|
378 Boolean should_free_utf8 = FALSE;
|
|
379
|
|
380 do
|
|
381 {
|
|
382 /* Use MacRoman by default, we might be called before we have p_enc
|
|
383 * set up. Convert to utf-8 first, works better with iconv(). Does
|
|
384 * nothing if 'encoding' is "utf-8". */
|
|
385 conv.vc_type = CONV_NONE;
|
|
386 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
|
|
387 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
|
|
388 (char_u *)"utf-8") == FAIL)
|
|
389 break;
|
|
390
|
|
391 if (conv.vc_type != CONV_NONE)
|
|
392 {
|
|
393 utf8_len = fromLen;
|
|
394 utf8_str = string_convert(&conv, from, (int *)&utf8_len);
|
|
395 should_free_utf8 = TRUE;
|
|
396 }
|
|
397 else
|
|
398 {
|
|
399 utf8_str = from;
|
|
400 utf8_len = fromLen;
|
|
401 }
|
|
402
|
|
403 if (utf8_str == NULL)
|
|
404 break;
|
|
405
|
|
406 convert_setup(&conv, NULL, NULL);
|
|
407
|
|
408 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
|
|
409
|
|
410 if (should_free_utf8)
|
|
411 vim_free(utf8_str);
|
|
412 return result;
|
|
413 }
|
|
414 while (0);
|
|
415
|
|
416 if (actualLen)
|
|
417 *actualLen = 0;
|
|
418
|
|
419 return result;
|
|
420 }
|
|
421
|
|
422 /*
|
|
423 * Converts from UTF-16 UniChars to CFString
|
|
424 */
|
|
425 CFStringRef
|
|
426 mac_enc_to_cfstring(from, fromLen)
|
|
427 char_u *from;
|
|
428 size_t fromLen;
|
|
429 {
|
|
430 UniChar *utf16_str;
|
|
431 size_t utf16_len;
|
|
432 CFStringRef result = NULL;
|
|
433
|
|
434 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
|
|
435 if (utf16_str)
|
|
436 {
|
|
437 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
|
|
438 vim_free(utf16_str);
|
|
439 }
|
|
440
|
|
441 return result;
|
|
442 }
|
|
443
|
|
444 /*
|
|
445 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
|
|
446 */
|
|
447 char_u *
|
|
448 mac_precompose_path(decompPath, decompLen, precompLen)
|
|
449 char_u *decompPath;
|
|
450 size_t decompLen;
|
|
451 size_t *precompLen;
|
|
452 {
|
|
453 char_u *result = NULL;
|
|
454 size_t actualLen = 0;
|
|
455
|
|
456 if (gPathConverter)
|
|
457 {
|
|
458 result = alloc(decompLen);
|
|
459 if (result)
|
|
460 {
|
|
461 if (TECConvertText(gPathConverter, decompPath,
|
|
462 decompLen, &decompLen, result,
|
|
463 decompLen, &actualLen) != noErr)
|
|
464 {
|
|
465 vim_free(result);
|
|
466 result = NULL;
|
|
467 }
|
|
468 }
|
|
469 }
|
|
470
|
|
471 if (precompLen)
|
|
472 *precompLen = actualLen;
|
|
473
|
|
474 return result;
|
|
475 }
|
|
476
|
|
477 /*
|
|
478 * Converts from UTF-16 UniChars to precomposed UTF-8
|
|
479 */
|
|
480 char_u *
|
|
481 mac_utf16_to_utf8(from, fromLen, actualLen)
|
|
482 UniChar *from;
|
|
483 size_t fromLen;
|
|
484 size_t *actualLen;
|
|
485 {
|
|
486 ByteCount utf8_len;
|
|
487 ByteCount inputRead;
|
|
488 char_u *result;
|
|
489
|
|
490 if (gUTF16ToUTF8Converter)
|
|
491 {
|
|
492 result = alloc(fromLen * 6 + 1);
|
|
493 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
|
|
494 fromLen, &inputRead, result,
|
|
495 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr)
|
|
496 {
|
|
497 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead);
|
|
498 utf8_len += inputRead;
|
|
499 }
|
|
500 else
|
|
501 {
|
|
502 vim_free(result);
|
|
503 result = NULL;
|
|
504 }
|
|
505 }
|
|
506 else
|
|
507 {
|
|
508 result = NULL;
|
|
509 }
|
|
510
|
|
511 if (actualLen)
|
|
512 *actualLen = result ? utf8_len : 0;
|
|
513
|
|
514 return result;
|
|
515 }
|
|
516
|
|
517 /*
|
|
518 * Converts from UTF-8 to UTF-16 UniChars
|
|
519 */
|
|
520 UniChar *
|
|
521 mac_utf8_to_utf16(from, fromLen, actualLen)
|
|
522 char_u *from;
|
|
523 size_t fromLen;
|
|
524 size_t *actualLen;
|
|
525 {
|
|
526 CFStringRef utf8_str;
|
|
527 CFRange convertRange;
|
|
528 UniChar *result = NULL;
|
|
529
|
|
530 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
|
|
531 kCFStringEncodingUTF8, FALSE);
|
|
532
|
|
533 if (utf8_str == NULL) {
|
|
534 if (actualLen)
|
|
535 *actualLen = 0;
|
|
536 return NULL;
|
|
537 }
|
|
538
|
|
539 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
|
|
540 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar));
|
|
541
|
|
542 CFStringGetCharacters(utf8_str, convertRange, result);
|
|
543
|
|
544 CFRelease(utf8_str);
|
|
545
|
|
546 if (actualLen)
|
|
547 *actualLen = convertRange.length * sizeof(UniChar);
|
|
548
|
|
549 return result;
|
|
550 }
|
20
|
551 #endif /* FEAT_MBYTE */
|