Mercurial > vim
comparison src/os_mac_conv.c @ 168:4d9eabb1396e
updated for version 7.0051
author | vimboss |
---|---|
date | Tue, 22 Feb 2005 08:49:11 +0000 |
parents | 4ac1dce8dd5e |
children | 7fd70926e2e1 |
comparison
equal
deleted
inserted
replaced
167:c93c9cad9618 | 168:4d9eabb1396e |
---|---|
18 | 18 |
19 #ifdef FEAT_MBYTE | 19 #ifdef FEAT_MBYTE |
20 extern char_u *mac_string_convert __ARGS((char_u *ptr, int len, int *lenp, int fail_on_error, int from, int to, int *unconvlenp)); | 20 extern char_u *mac_string_convert __ARGS((char_u *ptr, int len, int *lenp, int fail_on_error, int from, int to, int *unconvlenp)); |
21 extern int macroman2enc __ARGS((char_u *ptr, long *sizep, long real_size)); | 21 extern int macroman2enc __ARGS((char_u *ptr, long *sizep, long real_size)); |
22 extern int enc2macroman __ARGS((char_u *from, size_t fromlen, char_u *to, int *tolenp, int maxtolen, char_u *rest, int *restlenp)); | 22 extern int enc2macroman __ARGS((char_u *from, size_t fromlen, char_u *to, int *tolenp, int maxtolen, char_u *rest, int *restlenp)); |
23 | |
24 extern void mac_conv_init __ARGS((void)); | |
25 extern void mac_conv_cleanup __ARGS((void)); | |
26 extern char_u *mac_utf16_to_enc __ARGS((UniChar *from, size_t fromLen, size_t *actualLen)); | |
27 extern UniChar *mac_enc_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen)); | |
28 extern CFStringRef mac_enc_to_cfstring __ARGS((char_u *from, size_t fromLen)); | |
29 extern char_u *mac_precompose_path __ARGS((char_u *decompPath, size_t decompLen, size_t *precompLen)); | |
30 | |
31 static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen)); | |
32 static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen)); | |
33 | |
34 /* Converter for composing decomposed HFS+ file paths */ | |
35 static TECObjectRef gPathConverter; | |
36 /* Converter used by mac_utf16_to_utf8 */ | |
37 static TECObjectRef gUTF16ToUTF8Converter; | |
23 | 38 |
24 /* | 39 /* |
25 * A Mac version of string_convert_ext() for special cases. | 40 * A Mac version of string_convert_ext() for special cases. |
26 */ | 41 */ |
27 char_u * | 42 char_u * |
57 | 72 |
58 if (unconvlenp != NULL) | 73 if (unconvlenp != NULL) |
59 *unconvlenp = 0; | 74 *unconvlenp = 0; |
60 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); | 75 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); |
61 | 76 |
77 if(cfstr == NULL) | |
78 fprintf(stderr, "Encoding failed\n"); | |
62 /* When conversion failed, try excluding bytes from the end, helps when | 79 /* When conversion failed, try excluding bytes from the end, helps when |
63 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid | 80 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid |
64 * looping a long time when there really is something unconvertable. */ | 81 * looping a long time when there really is something unconvertable. */ |
65 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) | 82 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) |
66 { | 83 { |
68 ++*unconvlenp; | 85 ++*unconvlenp; |
69 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); | 86 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); |
70 } | 87 } |
71 if (cfstr == NULL) | 88 if (cfstr == NULL) |
72 return NULL; | 89 return NULL; |
90 | |
73 if (to == kCFStringEncodingUTF8) | 91 if (to == kCFStringEncodingUTF8) |
74 buflen = len * 6 + 1; | 92 buflen = len * 6 + 1; |
75 else | 93 else |
76 buflen = len + 1; | 94 buflen = len + 1; |
77 retval = alloc(buflen); | 95 retval = alloc(buflen); |
78 if (retval == NULL) | 96 if (retval == NULL) |
79 { | 97 { |
80 CFRelease(cfstr); | 98 CFRelease(cfstr); |
81 return NULL; | 99 return NULL; |
82 } | 100 } |
101 | |
102 #if 0 | |
103 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr)); | |
104 /* Determine output buffer size */ | |
105 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen); | |
106 retval = (buflen > 0) ? alloc(buflen) : NULL; | |
107 if (retval == NULL) { | |
108 CFRelease(cfstr); | |
109 return NULL; | |
110 } | |
111 | |
112 if (lenp) | |
113 *lenp = buflen / sizeof(char_u); | |
114 | |
115 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL)) | |
116 #endif | |
83 if (!CFStringGetCString(cfstr, retval, buflen, to)) | 117 if (!CFStringGetCString(cfstr, retval, buflen, to)) |
84 { | 118 { |
85 CFRelease(cfstr); | 119 CFRelease(cfstr); |
86 if (fail_on_error) | 120 if (fail_on_error) |
87 { | 121 { |
88 vim_free(retval); | 122 vim_free(retval); |
89 return NULL; | 123 return NULL; |
90 } | 124 } |
91 | 125 |
126 fprintf(stderr, "Trying char-by-char conversion...\n"); | |
92 /* conversion failed for the whole string, but maybe it will work | 127 /* conversion failed for the whole string, but maybe it will work |
93 * for each character */ | 128 * for each character */ |
94 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) | 129 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) |
95 { | 130 { |
96 if (from == kCFStringEncodingUTF8) | 131 if (from == kCFStringEncodingUTF8) |
126 return retval; | 161 return retval; |
127 } | 162 } |
128 CFRelease(cfstr); | 163 CFRelease(cfstr); |
129 if (lenp != NULL) | 164 if (lenp != NULL) |
130 *lenp = strlen(retval); | 165 *lenp = strlen(retval); |
166 | |
131 return retval; | 167 return retval; |
132 } | 168 } |
133 | 169 |
134 /* | 170 /* |
135 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using | 171 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using |
228 CFRelease(cfstr); | 264 CFRelease(cfstr); |
229 *tolenp = l; | 265 *tolenp = l; |
230 return OK; | 266 return OK; |
231 } | 267 } |
232 | 268 |
269 /* | |
270 * Initializes text converters | |
271 */ | |
272 void | |
273 mac_conv_init() | |
274 { | |
275 TextEncoding utf8_encoding; | |
276 TextEncoding utf8_hfsplus_encoding; | |
277 TextEncoding utf8_canon_encoding; | |
278 TextEncoding utf16_encoding; | |
279 | |
280 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
281 kTextEncodingDefaultVariant, kUnicodeUTF8Format); | |
282 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
283 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format); | |
284 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
285 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format); | |
286 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, | |
287 kTextEncodingDefaultVariant, kUnicode16BitFormat); | |
288 | |
289 if (TECCreateConverter(&gPathConverter, utf8_encoding, | |
290 utf8_hfsplus_encoding) != noErr) | |
291 gPathConverter = NULL; | |
292 | |
293 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding, | |
294 utf8_canon_encoding) != noErr) | |
295 gUTF16ToUTF8Converter = NULL; | |
296 } | |
297 | |
298 /* | |
299 * Destroys text converters | |
300 */ | |
301 void | |
302 mac_conv_cleanup() | |
303 { | |
304 if (gUTF16ToUTF8Converter) | |
305 { | |
306 TECDisposeConverter(gUTF16ToUTF8Converter); | |
307 gUTF16ToUTF8Converter = NULL; | |
308 } | |
309 | |
310 if (gPathConverter) | |
311 { | |
312 TECDisposeConverter(gPathConverter); | |
313 gPathConverter = NULL; | |
314 } | |
315 } | |
316 | |
317 /* | |
318 * Conversion from UTF-16 UniChars to 'encoding' | |
319 */ | |
320 char_u * | |
321 mac_utf16_to_enc(from, fromLen, actualLen) | |
322 UniChar *from; | |
323 size_t fromLen; | |
324 size_t *actualLen; | |
325 { | |
326 /* Following code borrows somewhat from os_mswin.c */ | |
327 vimconv_T conv; | |
328 size_t utf8_len; | |
329 char_u *utf8_str; | |
330 char_u *result = NULL; | |
331 | |
332 /* Convert to utf-8 first, works better with iconv */ | |
333 utf8_len = 0; | |
334 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len); | |
335 | |
336 if (utf8_str) | |
337 { | |
338 /* We might be called before we have p_enc set up. */ | |
339 conv.vc_type = CONV_NONE; | |
340 | |
341 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim | |
342 * internal unicode is always utf-8) so don't convert in such cases */ | |
343 | |
344 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0) | |
345 convert_setup(&conv, (char_u *)"utf-8", | |
346 p_enc? p_enc: (char_u *)"macroman"); | |
347 if (conv.vc_type == CONV_NONE) | |
348 { | |
349 /* p_enc is utf-8, so we're done. */ | |
350 result = utf8_str; | |
351 } | |
352 else | |
353 { | |
354 result = string_convert(&conv, utf8_str, (int *)&utf8_len); | |
355 vim_free(utf8_str); | |
356 } | |
357 | |
358 convert_setup(&conv, NULL, NULL); | |
359 | |
360 if (actualLen) | |
361 *actualLen = utf8_len; | |
362 } | |
363 else if (actualLen) | |
364 *actualLen = 0; | |
365 | |
366 return result; | |
367 } | |
368 | |
369 /* | |
370 * Conversion from 'encoding' to UTF-16 UniChars | |
371 */ | |
372 UniChar * | |
373 mac_enc_to_utf16(from, fromLen, actualLen) | |
374 char_u *from; | |
375 size_t fromLen; | |
376 size_t *actualLen; | |
377 { | |
378 /* Following code borrows somewhat from os_mswin.c */ | |
379 vimconv_T conv; | |
380 size_t utf8_len; | |
381 char_u *utf8_str; | |
382 UniChar *result = NULL; | |
383 Boolean should_free_utf8 = FALSE; | |
384 | |
385 do | |
386 { | |
387 /* Use MacRoman by default, we might be called before we have p_enc | |
388 * set up. Convert to utf-8 first, works better with iconv(). Does | |
389 * nothing if 'encoding' is "utf-8". */ | |
390 conv.vc_type = CONV_NONE; | |
391 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 && | |
392 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman", | |
393 (char_u *)"utf-8") == FAIL) | |
394 break; | |
395 | |
396 if (conv.vc_type != CONV_NONE) | |
397 { | |
398 utf8_len = fromLen; | |
399 utf8_str = string_convert(&conv, from, (int *)&utf8_len); | |
400 should_free_utf8 = TRUE; | |
401 } | |
402 else | |
403 { | |
404 utf8_str = from; | |
405 utf8_len = fromLen; | |
406 } | |
407 | |
408 if (utf8_str == NULL) | |
409 break; | |
410 | |
411 convert_setup(&conv, NULL, NULL); | |
412 | |
413 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen); | |
414 | |
415 if (should_free_utf8) | |
416 vim_free(utf8_str); | |
417 return result; | |
418 } | |
419 while (0); | |
420 | |
421 if (actualLen) | |
422 *actualLen = 0; | |
423 | |
424 return result; | |
425 } | |
426 | |
427 /* | |
428 * Converts from UTF-16 UniChars to CFString | |
429 */ | |
430 CFStringRef | |
431 mac_enc_to_cfstring(from, fromLen) | |
432 char_u *from; | |
433 size_t fromLen; | |
434 { | |
435 UniChar *utf16_str; | |
436 size_t utf16_len; | |
437 CFStringRef result = NULL; | |
438 | |
439 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len); | |
440 if (utf16_str) | |
441 { | |
442 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar)); | |
443 vim_free(utf16_str); | |
444 } | |
445 | |
446 return result; | |
447 } | |
448 | |
449 /* | |
450 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8 | |
451 */ | |
452 char_u * | |
453 mac_precompose_path(decompPath, decompLen, precompLen) | |
454 char_u *decompPath; | |
455 size_t decompLen; | |
456 size_t *precompLen; | |
457 { | |
458 char_u *result = NULL; | |
459 size_t actualLen = 0; | |
460 | |
461 if (gPathConverter) | |
462 { | |
463 result = alloc(decompLen); | |
464 if (result) | |
465 { | |
466 if (TECConvertText(gPathConverter, decompPath, | |
467 decompLen, &decompLen, result, | |
468 decompLen, &actualLen) != noErr) | |
469 { | |
470 vim_free(result); | |
471 result = NULL; | |
472 } | |
473 } | |
474 } | |
475 | |
476 if (precompLen) | |
477 *precompLen = actualLen; | |
478 | |
479 return result; | |
480 } | |
481 | |
482 /* | |
483 * Converts from UTF-16 UniChars to precomposed UTF-8 | |
484 */ | |
485 char_u * | |
486 mac_utf16_to_utf8(from, fromLen, actualLen) | |
487 UniChar *from; | |
488 size_t fromLen; | |
489 size_t *actualLen; | |
490 { | |
491 ByteCount utf8_len; | |
492 ByteCount inputRead; | |
493 char_u *result; | |
494 | |
495 if (gUTF16ToUTF8Converter) | |
496 { | |
497 result = alloc(fromLen * 6 + 1); | |
498 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from, | |
499 fromLen, &inputRead, result, | |
500 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr) | |
501 { | |
502 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead); | |
503 utf8_len += inputRead; | |
504 } | |
505 else | |
506 { | |
507 vim_free(result); | |
508 result = NULL; | |
509 } | |
510 } | |
511 else | |
512 { | |
513 result = NULL; | |
514 } | |
515 | |
516 if (actualLen) | |
517 *actualLen = result ? utf8_len : 0; | |
518 | |
519 return result; | |
520 } | |
521 | |
522 /* | |
523 * Converts from UTF-8 to UTF-16 UniChars | |
524 */ | |
525 UniChar * | |
526 mac_utf8_to_utf16(from, fromLen, actualLen) | |
527 char_u *from; | |
528 size_t fromLen; | |
529 size_t *actualLen; | |
530 { | |
531 CFStringRef utf8_str; | |
532 CFRange convertRange; | |
533 UniChar *result = NULL; | |
534 | |
535 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen, | |
536 kCFStringEncodingUTF8, FALSE); | |
537 | |
538 if (utf8_str == NULL) { | |
539 if (actualLen) | |
540 *actualLen = 0; | |
541 return NULL; | |
542 } | |
543 | |
544 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str)); | |
545 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar)); | |
546 | |
547 CFStringGetCharacters(utf8_str, convertRange, result); | |
548 | |
549 CFRelease(utf8_str); | |
550 | |
551 if (actualLen) | |
552 *actualLen = convertRange.length * sizeof(UniChar); | |
553 | |
554 return result; | |
555 } | |
233 #endif /* FEAT_MBYTE */ | 556 #endif /* FEAT_MBYTE */ |