18
|
1 /* vi:set ts=8 sts=4 sw=4:
|
|
2 *
|
|
3 * VIM - Vi IMproved by Bram Moolenaar
|
|
4 *
|
|
5 * Do ":help uganda" in Vim to read copying and usage conditions.
|
|
6 * Do ":help credits" in Vim to see a list of people who contributed.
|
|
7 * See README.txt for an overview of the Vim source code.
|
|
8 */
|
|
9 /*
|
|
10 * os_mac_conv.c: Code specifically for Mac string conversions.
|
|
11 *
|
|
12 * This code has been put in a separate file to avoid the conflicts that are
|
|
13 * caused by including both the X11 and Carbon header files.
|
|
14 */
|
|
15
|
|
16 #define NO_X11_INCLUDES
|
|
17 #include "vim.h"
|
|
18
|
20
|
19 #ifdef FEAT_MBYTE
|
18
|
20 extern char_u *mac_string_convert __ARGS((char_u *ptr, int len, int *lenp, int fail_on_error, int from, int to, int *unconvlenp));
|
|
21 extern int macroman2enc __ARGS((char_u *ptr, long *sizep, long real_size));
|
|
22 extern int enc2macroman __ARGS((char_u *from, size_t fromlen, char_u *to, int *tolenp, int maxtolen, char_u *rest, int *restlenp));
|
|
23
|
168
|
24 extern void mac_conv_init __ARGS((void));
|
|
25 extern void mac_conv_cleanup __ARGS((void));
|
|
26 extern char_u *mac_utf16_to_enc __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
|
|
27 extern UniChar *mac_enc_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
|
|
28 extern CFStringRef mac_enc_to_cfstring __ARGS((char_u *from, size_t fromLen));
|
|
29 extern char_u *mac_precompose_path __ARGS((char_u *decompPath, size_t decompLen, size_t *precompLen));
|
|
30
|
|
31 static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
|
|
32 static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
|
|
33
|
|
34 /* Converter for composing decomposed HFS+ file paths */
|
|
35 static TECObjectRef gPathConverter;
|
|
36 /* Converter used by mac_utf16_to_utf8 */
|
|
37 static TECObjectRef gUTF16ToUTF8Converter;
|
|
38
|
18
|
39 /*
|
|
40 * A Mac version of string_convert_ext() for special cases.
|
|
41 */
|
|
42 char_u *
|
|
43 mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp)
|
|
44 char_u *ptr;
|
|
45 int len;
|
|
46 int *lenp;
|
|
47 int fail_on_error;
|
|
48 int from_enc;
|
|
49 int to_enc;
|
|
50 int *unconvlenp;
|
|
51 {
|
|
52 char_u *retval, *d;
|
|
53 CFStringRef cfstr;
|
|
54 int buflen, in, out, l, i;
|
|
55 CFStringEncoding from;
|
|
56 CFStringEncoding to;
|
|
57
|
|
58 switch (from_enc)
|
|
59 {
|
|
60 case 'l': from = kCFStringEncodingISOLatin1; break;
|
|
61 case 'm': from = kCFStringEncodingMacRoman; break;
|
|
62 case 'u': from = kCFStringEncodingUTF8; break;
|
|
63 default: return NULL;
|
|
64 }
|
|
65 switch (to_enc)
|
|
66 {
|
|
67 case 'l': to = kCFStringEncodingISOLatin1; break;
|
|
68 case 'm': to = kCFStringEncodingMacRoman; break;
|
|
69 case 'u': to = kCFStringEncodingUTF8; break;
|
|
70 default: return NULL;
|
|
71 }
|
|
72
|
|
73 if (unconvlenp != NULL)
|
|
74 *unconvlenp = 0;
|
|
75 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
|
|
76
|
168
|
77 if(cfstr == NULL)
|
|
78 fprintf(stderr, "Encoding failed\n");
|
18
|
79 /* When conversion failed, try excluding bytes from the end, helps when
|
|
80 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
|
|
81 * looping a long time when there really is something unconvertable. */
|
|
82 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
|
|
83 {
|
|
84 --len;
|
|
85 ++*unconvlenp;
|
|
86 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
|
|
87 }
|
|
88 if (cfstr == NULL)
|
|
89 return NULL;
|
168
|
90
|
18
|
91 if (to == kCFStringEncodingUTF8)
|
|
92 buflen = len * 6 + 1;
|
|
93 else
|
|
94 buflen = len + 1;
|
|
95 retval = alloc(buflen);
|
|
96 if (retval == NULL)
|
|
97 {
|
|
98 CFRelease(cfstr);
|
|
99 return NULL;
|
|
100 }
|
168
|
101
|
|
102 #if 0
|
|
103 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
|
|
104 /* Determine output buffer size */
|
|
105 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
|
|
106 retval = (buflen > 0) ? alloc(buflen) : NULL;
|
|
107 if (retval == NULL) {
|
|
108 CFRelease(cfstr);
|
|
109 return NULL;
|
|
110 }
|
|
111
|
|
112 if (lenp)
|
|
113 *lenp = buflen / sizeof(char_u);
|
|
114
|
|
115 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
|
|
116 #endif
|
18
|
117 if (!CFStringGetCString(cfstr, retval, buflen, to))
|
|
118 {
|
|
119 CFRelease(cfstr);
|
|
120 if (fail_on_error)
|
|
121 {
|
|
122 vim_free(retval);
|
|
123 return NULL;
|
|
124 }
|
|
125
|
168
|
126 fprintf(stderr, "Trying char-by-char conversion...\n");
|
18
|
127 /* conversion failed for the whole string, but maybe it will work
|
|
128 * for each character */
|
|
129 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
|
|
130 {
|
|
131 if (from == kCFStringEncodingUTF8)
|
|
132 l = utf_ptr2len_check(ptr + in);
|
|
133 else
|
|
134 l = 1;
|
|
135 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
|
|
136 if (cfstr == NULL)
|
|
137 {
|
|
138 *d++ = '?';
|
|
139 out++;
|
|
140 }
|
|
141 else
|
|
142 {
|
|
143 if (!CFStringGetCString(cfstr, d, buflen - out, to))
|
|
144 {
|
|
145 *d++ = '?';
|
|
146 out++;
|
|
147 }
|
|
148 else
|
|
149 {
|
|
150 i = strlen(d);
|
|
151 d += i;
|
|
152 out += i;
|
|
153 }
|
|
154 CFRelease(cfstr);
|
|
155 }
|
|
156 in += l;
|
|
157 }
|
|
158 *d = NUL;
|
|
159 if (lenp != NULL)
|
|
160 *lenp = out;
|
|
161 return retval;
|
|
162 }
|
|
163 CFRelease(cfstr);
|
|
164 if (lenp != NULL)
|
|
165 *lenp = strlen(retval);
|
168
|
166
|
18
|
167 return retval;
|
|
168 }
|
|
169
|
|
170 /*
|
|
171 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
|
|
172 * standard Carbon framework.
|
|
173 * Input: "ptr[*sizep]".
|
|
174 * "real_size" is the size of the buffer that "ptr" points to.
|
|
175 * output is in-place, "sizep" is adjusted.
|
|
176 * Returns OK or FAIL.
|
|
177 */
|
|
178 int
|
|
179 macroman2enc(ptr, sizep, real_size)
|
|
180 char_u *ptr;
|
|
181 long *sizep;
|
|
182 long real_size;
|
|
183 {
|
|
184 CFStringRef cfstr;
|
|
185 CFRange r;
|
|
186 CFIndex len = *sizep;
|
|
187
|
|
188 /* MacRoman is an 8-bit encoding, no need to move bytes to
|
|
189 * conv_rest[]. */
|
|
190 cfstr = CFStringCreateWithBytes(NULL, ptr, len,
|
|
191 kCFStringEncodingMacRoman, 0);
|
|
192 /*
|
|
193 * If there is a conversion error, try using another
|
|
194 * conversion.
|
|
195 */
|
|
196 if (cfstr == NULL)
|
|
197 return FAIL;
|
|
198
|
|
199 r.location = 0;
|
|
200 r.length = CFStringGetLength(cfstr);
|
|
201 if (r.length != CFStringGetBytes(cfstr, r,
|
|
202 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
|
|
203 0, /* no lossy conversion */
|
|
204 0, /* not external representation */
|
|
205 ptr + *sizep, real_size - *sizep, &len))
|
|
206 {
|
|
207 CFRelease(cfstr);
|
|
208 return FAIL;
|
|
209 }
|
|
210 CFRelease(cfstr);
|
|
211 mch_memmove(ptr, ptr + *sizep, len);
|
|
212 *sizep = len;
|
|
213
|
|
214 return OK;
|
|
215 }
|
|
216
|
|
217 /*
|
|
218 * Conversion from UTF-8 or latin1 to MacRoman.
|
|
219 * Input: "from[fromlen]"
|
|
220 * Output: "to[maxtolen]" length in "*tolenp"
|
|
221 * Unconverted rest in rest[*restlenp].
|
|
222 * Returns OK or FAIL.
|
|
223 */
|
|
224 int
|
|
225 enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp)
|
|
226 char_u *from;
|
|
227 size_t fromlen;
|
|
228 char_u *to;
|
|
229 int *tolenp;
|
|
230 int maxtolen;
|
|
231 char_u *rest;
|
|
232 int *restlenp;
|
|
233 {
|
|
234 CFStringRef cfstr;
|
|
235 CFRange r;
|
|
236 CFIndex l;
|
|
237
|
|
238 *restlenp = 0;
|
|
239 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
|
|
240 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
|
|
241 0);
|
|
242 while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
|
|
243 {
|
|
244 rest[*restlenp++] = from[--fromlen];
|
|
245 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
|
|
246 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
|
|
247 0);
|
|
248 }
|
|
249 if (cfstr == NULL)
|
|
250 return FAIL;
|
|
251
|
|
252 r.location = 0;
|
|
253 r.length = CFStringGetLength(cfstr);
|
|
254 if (r.length != CFStringGetBytes(cfstr, r,
|
|
255 kCFStringEncodingMacRoman,
|
|
256 0, /* no lossy conversion */
|
|
257 0, /* not external representation (since vim
|
|
258 * handles this internally */
|
|
259 to, maxtolen, &l))
|
|
260 {
|
|
261 CFRelease(cfstr);
|
|
262 return FAIL;
|
|
263 }
|
|
264 CFRelease(cfstr);
|
|
265 *tolenp = l;
|
|
266 return OK;
|
|
267 }
|
20
|
268
|
168
|
269 /*
|
|
270 * Initializes text converters
|
|
271 */
|
|
272 void
|
|
273 mac_conv_init()
|
|
274 {
|
|
275 TextEncoding utf8_encoding;
|
|
276 TextEncoding utf8_hfsplus_encoding;
|
|
277 TextEncoding utf8_canon_encoding;
|
|
278 TextEncoding utf16_encoding;
|
|
279
|
|
280 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
|
|
281 kTextEncodingDefaultVariant, kUnicodeUTF8Format);
|
|
282 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
|
|
283 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
|
|
284 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
|
|
285 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
|
|
286 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
|
|
287 kTextEncodingDefaultVariant, kUnicode16BitFormat);
|
|
288
|
|
289 if (TECCreateConverter(&gPathConverter, utf8_encoding,
|
|
290 utf8_hfsplus_encoding) != noErr)
|
|
291 gPathConverter = NULL;
|
|
292
|
|
293 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
|
|
294 utf8_canon_encoding) != noErr)
|
179
|
295 {
|
|
296 /* On pre-10.3, Unicode normalization is not available so
|
|
297 * fall back to non-normalizing converter */
|
|
298 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
|
|
299 utf8_encoding) != noErr)
|
|
300 gUTF16ToUTF8Converter = NULL;
|
|
301 }
|
168
|
302 }
|
|
303
|
|
304 /*
|
|
305 * Destroys text converters
|
|
306 */
|
|
307 void
|
|
308 mac_conv_cleanup()
|
|
309 {
|
|
310 if (gUTF16ToUTF8Converter)
|
|
311 {
|
|
312 TECDisposeConverter(gUTF16ToUTF8Converter);
|
|
313 gUTF16ToUTF8Converter = NULL;
|
|
314 }
|
|
315
|
|
316 if (gPathConverter)
|
|
317 {
|
|
318 TECDisposeConverter(gPathConverter);
|
|
319 gPathConverter = NULL;
|
|
320 }
|
|
321 }
|
|
322
|
|
323 /*
|
|
324 * Conversion from UTF-16 UniChars to 'encoding'
|
|
325 */
|
|
326 char_u *
|
|
327 mac_utf16_to_enc(from, fromLen, actualLen)
|
|
328 UniChar *from;
|
|
329 size_t fromLen;
|
|
330 size_t *actualLen;
|
|
331 {
|
|
332 /* Following code borrows somewhat from os_mswin.c */
|
|
333 vimconv_T conv;
|
|
334 size_t utf8_len;
|
|
335 char_u *utf8_str;
|
|
336 char_u *result = NULL;
|
|
337
|
|
338 /* Convert to utf-8 first, works better with iconv */
|
|
339 utf8_len = 0;
|
|
340 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
|
|
341
|
|
342 if (utf8_str)
|
|
343 {
|
|
344 /* We might be called before we have p_enc set up. */
|
|
345 conv.vc_type = CONV_NONE;
|
|
346
|
|
347 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
|
|
348 * internal unicode is always utf-8) so don't convert in such cases */
|
|
349
|
|
350 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
|
|
351 convert_setup(&conv, (char_u *)"utf-8",
|
|
352 p_enc? p_enc: (char_u *)"macroman");
|
|
353 if (conv.vc_type == CONV_NONE)
|
|
354 {
|
|
355 /* p_enc is utf-8, so we're done. */
|
|
356 result = utf8_str;
|
|
357 }
|
|
358 else
|
|
359 {
|
|
360 result = string_convert(&conv, utf8_str, (int *)&utf8_len);
|
|
361 vim_free(utf8_str);
|
|
362 }
|
|
363
|
|
364 convert_setup(&conv, NULL, NULL);
|
|
365
|
|
366 if (actualLen)
|
|
367 *actualLen = utf8_len;
|
|
368 }
|
|
369 else if (actualLen)
|
|
370 *actualLen = 0;
|
|
371
|
|
372 return result;
|
|
373 }
|
|
374
|
|
375 /*
|
|
376 * Conversion from 'encoding' to UTF-16 UniChars
|
|
377 */
|
|
378 UniChar *
|
|
379 mac_enc_to_utf16(from, fromLen, actualLen)
|
|
380 char_u *from;
|
|
381 size_t fromLen;
|
|
382 size_t *actualLen;
|
|
383 {
|
|
384 /* Following code borrows somewhat from os_mswin.c */
|
|
385 vimconv_T conv;
|
|
386 size_t utf8_len;
|
|
387 char_u *utf8_str;
|
|
388 UniChar *result = NULL;
|
|
389 Boolean should_free_utf8 = FALSE;
|
|
390
|
|
391 do
|
|
392 {
|
|
393 /* Use MacRoman by default, we might be called before we have p_enc
|
|
394 * set up. Convert to utf-8 first, works better with iconv(). Does
|
|
395 * nothing if 'encoding' is "utf-8". */
|
|
396 conv.vc_type = CONV_NONE;
|
|
397 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
|
|
398 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
|
|
399 (char_u *)"utf-8") == FAIL)
|
|
400 break;
|
|
401
|
|
402 if (conv.vc_type != CONV_NONE)
|
|
403 {
|
|
404 utf8_len = fromLen;
|
|
405 utf8_str = string_convert(&conv, from, (int *)&utf8_len);
|
|
406 should_free_utf8 = TRUE;
|
|
407 }
|
|
408 else
|
|
409 {
|
|
410 utf8_str = from;
|
|
411 utf8_len = fromLen;
|
|
412 }
|
|
413
|
|
414 if (utf8_str == NULL)
|
|
415 break;
|
|
416
|
|
417 convert_setup(&conv, NULL, NULL);
|
|
418
|
|
419 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
|
|
420
|
|
421 if (should_free_utf8)
|
|
422 vim_free(utf8_str);
|
|
423 return result;
|
|
424 }
|
|
425 while (0);
|
|
426
|
|
427 if (actualLen)
|
|
428 *actualLen = 0;
|
|
429
|
|
430 return result;
|
|
431 }
|
|
432
|
|
433 /*
|
|
434 * Converts from UTF-16 UniChars to CFString
|
|
435 */
|
|
436 CFStringRef
|
|
437 mac_enc_to_cfstring(from, fromLen)
|
|
438 char_u *from;
|
|
439 size_t fromLen;
|
|
440 {
|
|
441 UniChar *utf16_str;
|
|
442 size_t utf16_len;
|
|
443 CFStringRef result = NULL;
|
|
444
|
|
445 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
|
|
446 if (utf16_str)
|
|
447 {
|
|
448 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
|
|
449 vim_free(utf16_str);
|
|
450 }
|
|
451
|
|
452 return result;
|
|
453 }
|
|
454
|
|
455 /*
|
|
456 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
|
|
457 */
|
|
458 char_u *
|
|
459 mac_precompose_path(decompPath, decompLen, precompLen)
|
|
460 char_u *decompPath;
|
|
461 size_t decompLen;
|
|
462 size_t *precompLen;
|
|
463 {
|
|
464 char_u *result = NULL;
|
|
465 size_t actualLen = 0;
|
|
466
|
|
467 if (gPathConverter)
|
|
468 {
|
|
469 result = alloc(decompLen);
|
|
470 if (result)
|
|
471 {
|
|
472 if (TECConvertText(gPathConverter, decompPath,
|
|
473 decompLen, &decompLen, result,
|
|
474 decompLen, &actualLen) != noErr)
|
|
475 {
|
|
476 vim_free(result);
|
|
477 result = NULL;
|
|
478 }
|
|
479 }
|
|
480 }
|
|
481
|
|
482 if (precompLen)
|
|
483 *precompLen = actualLen;
|
|
484
|
|
485 return result;
|
|
486 }
|
|
487
|
|
488 /*
|
|
489 * Converts from UTF-16 UniChars to precomposed UTF-8
|
|
490 */
|
|
491 char_u *
|
|
492 mac_utf16_to_utf8(from, fromLen, actualLen)
|
|
493 UniChar *from;
|
|
494 size_t fromLen;
|
|
495 size_t *actualLen;
|
|
496 {
|
|
497 ByteCount utf8_len;
|
|
498 ByteCount inputRead;
|
|
499 char_u *result;
|
|
500
|
|
501 if (gUTF16ToUTF8Converter)
|
|
502 {
|
|
503 result = alloc(fromLen * 6 + 1);
|
|
504 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
|
|
505 fromLen, &inputRead, result,
|
|
506 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr)
|
|
507 {
|
|
508 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead);
|
|
509 utf8_len += inputRead;
|
|
510 }
|
|
511 else
|
|
512 {
|
|
513 vim_free(result);
|
|
514 result = NULL;
|
|
515 }
|
|
516 }
|
|
517 else
|
|
518 {
|
|
519 result = NULL;
|
|
520 }
|
|
521
|
|
522 if (actualLen)
|
|
523 *actualLen = result ? utf8_len : 0;
|
|
524
|
|
525 return result;
|
|
526 }
|
|
527
|
|
528 /*
|
|
529 * Converts from UTF-8 to UTF-16 UniChars
|
|
530 */
|
|
531 UniChar *
|
|
532 mac_utf8_to_utf16(from, fromLen, actualLen)
|
|
533 char_u *from;
|
|
534 size_t fromLen;
|
|
535 size_t *actualLen;
|
|
536 {
|
|
537 CFStringRef utf8_str;
|
|
538 CFRange convertRange;
|
|
539 UniChar *result = NULL;
|
|
540
|
|
541 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
|
|
542 kCFStringEncodingUTF8, FALSE);
|
|
543
|
|
544 if (utf8_str == NULL) {
|
|
545 if (actualLen)
|
|
546 *actualLen = 0;
|
|
547 return NULL;
|
|
548 }
|
|
549
|
|
550 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
|
|
551 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar));
|
|
552
|
|
553 CFStringGetCharacters(utf8_str, convertRange, result);
|
|
554
|
|
555 CFRelease(utf8_str);
|
|
556
|
|
557 if (actualLen)
|
|
558 *actualLen = convertRange.length * sizeof(UniChar);
|
|
559
|
|
560 return result;
|
|
561 }
|
20
|
562 #endif /* FEAT_MBYTE */
|