comparison src/os_mac_conv.c @ 168:4d9eabb1396e

updated for version 7.0051
author vimboss
date Tue, 22 Feb 2005 08:49:11 +0000
parents 4ac1dce8dd5e
children 7fd70926e2e1
comparison
equal deleted inserted replaced
167:c93c9cad9618 168:4d9eabb1396e
18 18
19 #ifdef FEAT_MBYTE 19 #ifdef FEAT_MBYTE
20 extern char_u *mac_string_convert __ARGS((char_u *ptr, int len, int *lenp, int fail_on_error, int from, int to, int *unconvlenp)); 20 extern char_u *mac_string_convert __ARGS((char_u *ptr, int len, int *lenp, int fail_on_error, int from, int to, int *unconvlenp));
21 extern int macroman2enc __ARGS((char_u *ptr, long *sizep, long real_size)); 21 extern int macroman2enc __ARGS((char_u *ptr, long *sizep, long real_size));
22 extern int enc2macroman __ARGS((char_u *from, size_t fromlen, char_u *to, int *tolenp, int maxtolen, char_u *rest, int *restlenp)); 22 extern int enc2macroman __ARGS((char_u *from, size_t fromlen, char_u *to, int *tolenp, int maxtolen, char_u *rest, int *restlenp));
23
24 extern void mac_conv_init __ARGS((void));
25 extern void mac_conv_cleanup __ARGS((void));
26 extern char_u *mac_utf16_to_enc __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
27 extern UniChar *mac_enc_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
28 extern CFStringRef mac_enc_to_cfstring __ARGS((char_u *from, size_t fromLen));
29 extern char_u *mac_precompose_path __ARGS((char_u *decompPath, size_t decompLen, size_t *precompLen));
30
31 static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
32 static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
33
34 /* Converter for composing decomposed HFS+ file paths */
35 static TECObjectRef gPathConverter;
36 /* Converter used by mac_utf16_to_utf8 */
37 static TECObjectRef gUTF16ToUTF8Converter;
23 38
24 /* 39 /*
25 * A Mac version of string_convert_ext() for special cases. 40 * A Mac version of string_convert_ext() for special cases.
26 */ 41 */
27 char_u * 42 char_u *
57 72
58 if (unconvlenp != NULL) 73 if (unconvlenp != NULL)
59 *unconvlenp = 0; 74 *unconvlenp = 0;
60 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); 75 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
61 76
77 if(cfstr == NULL)
78 fprintf(stderr, "Encoding failed\n");
62 /* When conversion failed, try excluding bytes from the end, helps when 79 /* When conversion failed, try excluding bytes from the end, helps when
63 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid 80 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
64 * looping a long time when there really is something unconvertable. */ 81 * looping a long time when there really is something unconvertable. */
65 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) 82 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
66 { 83 {
68 ++*unconvlenp; 85 ++*unconvlenp;
69 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); 86 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
70 } 87 }
71 if (cfstr == NULL) 88 if (cfstr == NULL)
72 return NULL; 89 return NULL;
90
73 if (to == kCFStringEncodingUTF8) 91 if (to == kCFStringEncodingUTF8)
74 buflen = len * 6 + 1; 92 buflen = len * 6 + 1;
75 else 93 else
76 buflen = len + 1; 94 buflen = len + 1;
77 retval = alloc(buflen); 95 retval = alloc(buflen);
78 if (retval == NULL) 96 if (retval == NULL)
79 { 97 {
80 CFRelease(cfstr); 98 CFRelease(cfstr);
81 return NULL; 99 return NULL;
82 } 100 }
101
102 #if 0
103 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
104 /* Determine output buffer size */
105 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
106 retval = (buflen > 0) ? alloc(buflen) : NULL;
107 if (retval == NULL) {
108 CFRelease(cfstr);
109 return NULL;
110 }
111
112 if (lenp)
113 *lenp = buflen / sizeof(char_u);
114
115 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
116 #endif
83 if (!CFStringGetCString(cfstr, retval, buflen, to)) 117 if (!CFStringGetCString(cfstr, retval, buflen, to))
84 { 118 {
85 CFRelease(cfstr); 119 CFRelease(cfstr);
86 if (fail_on_error) 120 if (fail_on_error)
87 { 121 {
88 vim_free(retval); 122 vim_free(retval);
89 return NULL; 123 return NULL;
90 } 124 }
91 125
126 fprintf(stderr, "Trying char-by-char conversion...\n");
92 /* conversion failed for the whole string, but maybe it will work 127 /* conversion failed for the whole string, but maybe it will work
93 * for each character */ 128 * for each character */
94 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) 129 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
95 { 130 {
96 if (from == kCFStringEncodingUTF8) 131 if (from == kCFStringEncodingUTF8)
126 return retval; 161 return retval;
127 } 162 }
128 CFRelease(cfstr); 163 CFRelease(cfstr);
129 if (lenp != NULL) 164 if (lenp != NULL)
130 *lenp = strlen(retval); 165 *lenp = strlen(retval);
166
131 return retval; 167 return retval;
132 } 168 }
133 169
134 /* 170 /*
135 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using 171 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
228 CFRelease(cfstr); 264 CFRelease(cfstr);
229 *tolenp = l; 265 *tolenp = l;
230 return OK; 266 return OK;
231 } 267 }
232 268
269 /*
270 * Initializes text converters
271 */
272 void
273 mac_conv_init()
274 {
275 TextEncoding utf8_encoding;
276 TextEncoding utf8_hfsplus_encoding;
277 TextEncoding utf8_canon_encoding;
278 TextEncoding utf16_encoding;
279
280 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
281 kTextEncodingDefaultVariant, kUnicodeUTF8Format);
282 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
283 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
284 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
285 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
286 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
287 kTextEncodingDefaultVariant, kUnicode16BitFormat);
288
289 if (TECCreateConverter(&gPathConverter, utf8_encoding,
290 utf8_hfsplus_encoding) != noErr)
291 gPathConverter = NULL;
292
293 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
294 utf8_canon_encoding) != noErr)
295 gUTF16ToUTF8Converter = NULL;
296 }
297
298 /*
299 * Destroys text converters
300 */
301 void
302 mac_conv_cleanup()
303 {
304 if (gUTF16ToUTF8Converter)
305 {
306 TECDisposeConverter(gUTF16ToUTF8Converter);
307 gUTF16ToUTF8Converter = NULL;
308 }
309
310 if (gPathConverter)
311 {
312 TECDisposeConverter(gPathConverter);
313 gPathConverter = NULL;
314 }
315 }
316
317 /*
318 * Conversion from UTF-16 UniChars to 'encoding'
319 */
320 char_u *
321 mac_utf16_to_enc(from, fromLen, actualLen)
322 UniChar *from;
323 size_t fromLen;
324 size_t *actualLen;
325 {
326 /* Following code borrows somewhat from os_mswin.c */
327 vimconv_T conv;
328 size_t utf8_len;
329 char_u *utf8_str;
330 char_u *result = NULL;
331
332 /* Convert to utf-8 first, works better with iconv */
333 utf8_len = 0;
334 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
335
336 if (utf8_str)
337 {
338 /* We might be called before we have p_enc set up. */
339 conv.vc_type = CONV_NONE;
340
341 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
342 * internal unicode is always utf-8) so don't convert in such cases */
343
344 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
345 convert_setup(&conv, (char_u *)"utf-8",
346 p_enc? p_enc: (char_u *)"macroman");
347 if (conv.vc_type == CONV_NONE)
348 {
349 /* p_enc is utf-8, so we're done. */
350 result = utf8_str;
351 }
352 else
353 {
354 result = string_convert(&conv, utf8_str, (int *)&utf8_len);
355 vim_free(utf8_str);
356 }
357
358 convert_setup(&conv, NULL, NULL);
359
360 if (actualLen)
361 *actualLen = utf8_len;
362 }
363 else if (actualLen)
364 *actualLen = 0;
365
366 return result;
367 }
368
369 /*
370 * Conversion from 'encoding' to UTF-16 UniChars
371 */
372 UniChar *
373 mac_enc_to_utf16(from, fromLen, actualLen)
374 char_u *from;
375 size_t fromLen;
376 size_t *actualLen;
377 {
378 /* Following code borrows somewhat from os_mswin.c */
379 vimconv_T conv;
380 size_t utf8_len;
381 char_u *utf8_str;
382 UniChar *result = NULL;
383 Boolean should_free_utf8 = FALSE;
384
385 do
386 {
387 /* Use MacRoman by default, we might be called before we have p_enc
388 * set up. Convert to utf-8 first, works better with iconv(). Does
389 * nothing if 'encoding' is "utf-8". */
390 conv.vc_type = CONV_NONE;
391 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
392 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
393 (char_u *)"utf-8") == FAIL)
394 break;
395
396 if (conv.vc_type != CONV_NONE)
397 {
398 utf8_len = fromLen;
399 utf8_str = string_convert(&conv, from, (int *)&utf8_len);
400 should_free_utf8 = TRUE;
401 }
402 else
403 {
404 utf8_str = from;
405 utf8_len = fromLen;
406 }
407
408 if (utf8_str == NULL)
409 break;
410
411 convert_setup(&conv, NULL, NULL);
412
413 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
414
415 if (should_free_utf8)
416 vim_free(utf8_str);
417 return result;
418 }
419 while (0);
420
421 if (actualLen)
422 *actualLen = 0;
423
424 return result;
425 }
426
427 /*
428 * Converts from UTF-16 UniChars to CFString
429 */
430 CFStringRef
431 mac_enc_to_cfstring(from, fromLen)
432 char_u *from;
433 size_t fromLen;
434 {
435 UniChar *utf16_str;
436 size_t utf16_len;
437 CFStringRef result = NULL;
438
439 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
440 if (utf16_str)
441 {
442 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
443 vim_free(utf16_str);
444 }
445
446 return result;
447 }
448
449 /*
450 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
451 */
452 char_u *
453 mac_precompose_path(decompPath, decompLen, precompLen)
454 char_u *decompPath;
455 size_t decompLen;
456 size_t *precompLen;
457 {
458 char_u *result = NULL;
459 size_t actualLen = 0;
460
461 if (gPathConverter)
462 {
463 result = alloc(decompLen);
464 if (result)
465 {
466 if (TECConvertText(gPathConverter, decompPath,
467 decompLen, &decompLen, result,
468 decompLen, &actualLen) != noErr)
469 {
470 vim_free(result);
471 result = NULL;
472 }
473 }
474 }
475
476 if (precompLen)
477 *precompLen = actualLen;
478
479 return result;
480 }
481
482 /*
483 * Converts from UTF-16 UniChars to precomposed UTF-8
484 */
485 char_u *
486 mac_utf16_to_utf8(from, fromLen, actualLen)
487 UniChar *from;
488 size_t fromLen;
489 size_t *actualLen;
490 {
491 ByteCount utf8_len;
492 ByteCount inputRead;
493 char_u *result;
494
495 if (gUTF16ToUTF8Converter)
496 {
497 result = alloc(fromLen * 6 + 1);
498 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
499 fromLen, &inputRead, result,
500 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr)
501 {
502 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead);
503 utf8_len += inputRead;
504 }
505 else
506 {
507 vim_free(result);
508 result = NULL;
509 }
510 }
511 else
512 {
513 result = NULL;
514 }
515
516 if (actualLen)
517 *actualLen = result ? utf8_len : 0;
518
519 return result;
520 }
521
522 /*
523 * Converts from UTF-8 to UTF-16 UniChars
524 */
525 UniChar *
526 mac_utf8_to_utf16(from, fromLen, actualLen)
527 char_u *from;
528 size_t fromLen;
529 size_t *actualLen;
530 {
531 CFStringRef utf8_str;
532 CFRange convertRange;
533 UniChar *result = NULL;
534
535 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
536 kCFStringEncodingUTF8, FALSE);
537
538 if (utf8_str == NULL) {
539 if (actualLen)
540 *actualLen = 0;
541 return NULL;
542 }
543
544 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
545 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar));
546
547 CFStringGetCharacters(utf8_str, convertRange, result);
548
549 CFRelease(utf8_str);
550
551 if (actualLen)
552 *actualLen = convertRange.length * sizeof(UniChar);
553
554 return result;
555 }
233 #endif /* FEAT_MBYTE */ 556 #endif /* FEAT_MBYTE */