comparison src/fileio.c @ 595:fea48f63efc8

updated for version 7.0169
author vimboss
date Tue, 13 Dec 2005 20:02:15 +0000
parents 6a91f35b354d
children ba54311bc43e
comparison
equal deleted inserted replaced
594:35cef95a6b76 595:fea48f63efc8
122 }; 122 };
123 123
124 static int buf_write_bytes __ARGS((struct bw_info *ip)); 124 static int buf_write_bytes __ARGS((struct bw_info *ip));
125 125
126 #ifdef FEAT_MBYTE 126 #ifdef FEAT_MBYTE
127 static linenr_T readfile_linenr __ARGS((linenr_T linecnt, char_u *p, char_u *endp));
127 static int ucs2bytes __ARGS((unsigned c, char_u **pp, int flags)); 128 static int ucs2bytes __ARGS((unsigned c, char_u **pp, int flags));
128 static int same_encoding __ARGS((char_u *a, char_u *b)); 129 static int same_encoding __ARGS((char_u *a, char_u *b));
129 static int get_fio_flags __ARGS((char_u *ptr)); 130 static int get_fio_flags __ARGS((char_u *ptr));
130 static char_u *check_for_bom __ARGS((char_u *p, long size, int *lenp, int flags)); 131 static char_u *check_for_bom __ARGS((char_u *p, long size, int *lenp, int flags));
131 static int make_bom __ARGS((char_u *buf, char_u *name)); 132 static int make_bom __ARGS((char_u *buf, char_u *name));
135 # ifdef MACOS_X 136 # ifdef MACOS_X
136 static int get_mac_fio_flags __ARGS((char_u *ptr)); 137 static int get_mac_fio_flags __ARGS((char_u *ptr));
137 # endif 138 # endif
138 #endif 139 #endif
139 static int move_lines __ARGS((buf_T *frombuf, buf_T *tobuf)); 140 static int move_lines __ARGS((buf_T *frombuf, buf_T *tobuf));
141
140 142
141 void 143 void
142 filemess(buf, name, s, attr) 144 filemess(buf, name, s, attr)
143 buf_T *buf; 145 buf_T *buf;
144 char_u *name; 146 char_u *name;
255 int try_dos = (vim_strchr(p_ffs, 'd') != NULL); 257 int try_dos = (vim_strchr(p_ffs, 'd') != NULL);
256 int try_unix = (vim_strchr(p_ffs, 'x') != NULL); 258 int try_unix = (vim_strchr(p_ffs, 'x') != NULL);
257 int file_rewind = FALSE; 259 int file_rewind = FALSE;
258 #ifdef FEAT_MBYTE 260 #ifdef FEAT_MBYTE
259 int can_retry; 261 int can_retry;
260 int conv_error = FALSE; /* conversion error detected */ 262 linenr_T conv_error = 0; /* line nr with conversion error */
263 linenr_T illegal_byte = 0; /* line nr with illegal byte */
261 int keep_dest_enc = FALSE; /* don't retry when char doesn't fit 264 int keep_dest_enc = FALSE; /* don't retry when char doesn't fit
262 in destination encoding */ 265 in destination encoding */
263 linenr_T illegal_byte = 0; /* line nr with illegal byte */ 266 int bad_char_behavior = BAD_REPLACE;
267 /* BAD_KEEP, BAD_DROP or character to
268 * replace with */
264 char_u *tmpname = NULL; /* name of 'charconvert' output file */ 269 char_u *tmpname = NULL; /* name of 'charconvert' output file */
265 int fio_flags = 0; 270 int fio_flags = 0;
266 char_u *fenc; /* fileencoding to use */ 271 char_u *fenc; /* fileencoding to use */
267 int fenc_alloced; /* fenc_next is in allocated memory */ 272 int fenc_alloced; /* fenc_next is in allocated memory */
268 char_u *fenc_next = NULL; /* next item in 'fencs' or NULL */ 273 char_u *fenc_next = NULL; /* next item in 'fencs' or NULL */
752 * fileformat, and after the autocommands, which may change them. 757 * fileformat, and after the autocommands, which may change them.
753 */ 758 */
754 linecnt = curbuf->b_ml.ml_line_count; 759 linecnt = curbuf->b_ml.ml_line_count;
755 760
756 #ifdef FEAT_MBYTE 761 #ifdef FEAT_MBYTE
762 /* "++bad=" argument. */
763 if (eap != NULL && eap->bad_char != 0)
764 bad_char_behavior = eap->bad_char;
765
757 /* 766 /*
758 * Decide which 'encoding' to use first. 767 * Decide which 'encoding' to use or use first.
759 */ 768 */
760 if (eap != NULL && eap->force_enc != 0) 769 if (eap != NULL && eap->force_enc != 0)
761 { 770 {
762 fenc = enc_canonize(eap->cmd + eap->force_enc); 771 fenc = enc_canonize(eap->cmd + eap->force_enc);
763 fenc_alloced = TRUE; 772 fenc_alloced = TRUE;
773 keep_dest_enc = TRUE;
764 } 774 }
765 else if (curbuf->b_p_bin) 775 else if (curbuf->b_p_bin)
766 { 776 {
767 fenc = (char_u *)""; /* binary: don't convert */ 777 fenc = (char_u *)""; /* binary: don't convert */
768 fenc_alloced = FALSE; 778 fenc_alloced = FALSE;
862 ml_delete(lnum--, FALSE); 872 ml_delete(lnum--, FALSE);
863 file_rewind = FALSE; 873 file_rewind = FALSE;
864 #ifdef FEAT_MBYTE 874 #ifdef FEAT_MBYTE
865 if (newfile) 875 if (newfile)
866 curbuf->b_p_bomb = FALSE; 876 curbuf->b_p_bomb = FALSE;
867 conv_error = FALSE; 877 conv_error = 0;
868 #endif 878 #endif
869 } 879 }
870 880
871 /* 881 /*
872 * When retrying with another "fenc" and the first time "fileformat" 882 * When retrying with another "fenc" and the first time "fileformat"
906 if (eap != NULL && eap->force_enc != 0) 916 if (eap != NULL && eap->force_enc != 0)
907 { 917 {
908 /* Conversion given with "++cc=" wasn't possible, read 918 /* Conversion given with "++cc=" wasn't possible, read
909 * without conversion. */ 919 * without conversion. */
910 notconverted = TRUE; 920 notconverted = TRUE;
911 conv_error = FALSE; 921 conv_error = 0;
912 if (fenc_alloced) 922 if (fenc_alloced)
913 vim_free(fenc); 923 vim_free(fenc);
914 fenc = (char_u *)""; 924 fenc = (char_u *)"";
915 fenc_alloced = FALSE; 925 fenc_alloced = FALSE;
916 } 926 }
1041 goto retry; 1051 goto retry;
1042 } 1052 }
1043 } 1053 }
1044 } 1054 }
1045 1055
1046 /* Set can_retry when it's possible to rewind the file and try with 1056 /* Set "can_retry" when it's possible to rewind the file and try with
1047 * another "fenc" value. It's FALSE when no other "fenc" to try, reading 1057 * another "fenc" value. It's FALSE when no other "fenc" to try, reading
1048 * stdin or "fenc" was specified with "++enc=". */ 1058 * stdin or fixed at a specific encoding. */
1049 can_retry = (*fenc != NUL && !read_stdin 1059 can_retry = (*fenc != NUL && !read_stdin && !keep_dest_enc);
1050 && (eap == NULL || eap->force_enc == 0));
1051 #endif 1060 #endif
1052 1061
1053 if (!skip_read) 1062 if (!skip_read)
1054 { 1063 {
1055 linerest = 0; 1064 linerest = 0;
1227 { 1236 {
1228 if (size < 0) /* read error */ 1237 if (size < 0) /* read error */
1229 error = TRUE; 1238 error = TRUE;
1230 #ifdef FEAT_MBYTE 1239 #ifdef FEAT_MBYTE
1231 else if (conv_restlen > 0) 1240 else if (conv_restlen > 0)
1232 /* some trailing bytes unconverted */ 1241 {
1233 conv_error = TRUE; 1242 /* Reached end-of-file but some trailing bytes could
1243 * not be converted. Trucated file? */
1244 if (conv_error == 0)
1245 conv_error = linecnt;
1246 if (bad_char_behavior != BAD_DROP)
1247 {
1248 fio_flags = 0; /* don't convert this */
1249 if (bad_char_behavior == BAD_KEEP)
1250 {
1251 /* Keep the trailing bytes as-is. */
1252 size = conv_restlen;
1253 ptr -= conv_restlen;
1254 }
1255 else
1256 {
1257 /* Replace the trailing bytes with the
1258 * replacement character. */
1259 size = 1;
1260 *--ptr = bad_char_behavior;
1261 }
1262 conv_restlen = 0;
1263 }
1264 }
1234 #endif 1265 #endif
1235 } 1266 }
1236 1267
1237 #ifdef FEAT_CRYPT 1268 #ifdef FEAT_CRYPT
1238 /* 1269 /*
1347 while ((iconv(iconv_fd, (void *)&fromp, &from_size, 1378 while ((iconv(iconv_fd, (void *)&fromp, &from_size,
1348 &top, &to_size) 1379 &top, &to_size)
1349 == (size_t)-1 && ICONV_ERRNO != ICONV_EINVAL) 1380 == (size_t)-1 && ICONV_ERRNO != ICONV_EINVAL)
1350 || from_size > CONV_RESTLEN) 1381 || from_size > CONV_RESTLEN)
1351 { 1382 {
1352 if (!keep_dest_enc && can_retry) 1383 if (can_retry)
1353 goto rewind_retry; 1384 goto rewind_retry;
1354 if (!keep_dest_enc) 1385 if (conv_error == 0)
1355 conv_error = TRUE; 1386 conv_error = readfile_linenr(linecnt,
1356 1387 ptr, (char_u *)top);
1357 /* Ignore a byte and try again. */ 1388
1389 /* Deal with a bad byte and continue with the next. */
1358 ++fromp; 1390 ++fromp;
1359 --from_size; 1391 --from_size;
1360 *top++ = '?'; 1392 if (bad_char_behavior == BAD_KEEP)
1361 --to_size; 1393 {
1394 *top++ = *(fromp - 1);
1395 --to_size;
1396 }
1397 else if (bad_char_behavior != BAD_DROP)
1398 {
1399 *top++ = bad_char_behavior;
1400 --to_size;
1401 }
1362 } 1402 }
1363 1403
1364 if (from_size > 0) 1404 if (from_size > 0)
1365 { 1405 {
1366 /* Some remaining characters, keep them for the next 1406 /* Some remaining characters, keep them for the next
1377 # endif 1417 # endif
1378 1418
1379 # ifdef WIN3264 1419 # ifdef WIN3264
1380 if (fio_flags & FIO_CODEPAGE) 1420 if (fio_flags & FIO_CODEPAGE)
1381 { 1421 {
1422 char_u *src, *dst;
1423 int u8c;
1424 WCHAR ucs2buf[3];
1425 int ucs2len;
1426 int codepage = FIO_GET_CP(fio_flags);
1427 int bytelen;
1428 int found_bad;
1429 char replstr[2];
1430
1382 /* 1431 /*
1383 * Conversion from an MS-Windows codepage or UTF-8 to UTF-8 or 1432 * Conversion from an MS-Windows codepage or UTF-8 to UTF-8 or
1384 * a codepage, using standard MS-Windows functions. 1433 * a codepage, using standard MS-Windows functions. This
1385 * 1. find out how many ucs-2 characters there are. 1434 * requires two steps:
1386 * 2. convert from 'fileencoding' to ucs-2 1435 * 1. convert from 'fileencoding' to ucs-2
1387 * 3. convert from ucs-2 to 'encoding' 1436 * 2. convert from ucs-2 to 'encoding'
1437 *
1438 * Because there may be illegal bytes AND an incomplete byte
1439 * sequence at the end, we may have to do the conversion one
1440 * character at a time to get it right.
1388 */ 1441 */
1389 char_u *ucsp; 1442
1390 size_t from_size = size; 1443 /* Replacement string for WideCharToMultiByte(). */
1391 int needed; 1444 if (bad_char_behavior > 0)
1392 char_u *p; 1445 replstr[0] = bad_char_behavior;
1393 int u8c; 1446 else
1447 replstr[0] = '?';
1448 replstr[1] = NUL;
1394 1449
1395 /* 1450 /*
1396 * 1. find out how many ucs-2 characters there are. 1451 * Move the bytes to the end of the buffer, so that we have
1452 * room to put the result at the start.
1397 */ 1453 */
1454 src = ptr + real_size - size;
1455 mch_memmove(src, ptr, size);
1456
1457 /*
1458 * Do the conversion.
1459 */
1460 dst = ptr;
1461 size = size;
1462 while (size > 0)
1463 {
1464 found_bad = FALSE;
1465
1398 # ifdef CP_UTF8 /* VC 4.1 doesn't define CP_UTF8 */ 1466 # ifdef CP_UTF8 /* VC 4.1 doesn't define CP_UTF8 */
1399 if (FIO_GET_CP(fio_flags) == CP_UTF8) 1467 if (codepage == CP_UTF8)
1468 {
1469 /* Handle CP_UTF8 input ourselves to be able to handle
1470 * trailing bytes properly.
1471 * Get one UTF-8 character from src. */
1472 bytelen = utf_ptr2len_len(src, size);
1473 if (bytelen > size)
1474 {
1475 /* Only got some bytes of a character. Normally
1476 * it's put in "conv_rest", but if it's too long
1477 * deal with it as if they were illegal bytes. */
1478 if (bytelen <= CONV_RESTLEN)
1479 break;
1480
1481 /* weird overlong byte sequence */
1482 bytelen = size;
1483 found_bad = TRUE;
1484 }
1485 else
1486 {
1487 u8c = utf_ptr2char(src);
1488 if (u8c > 0xffff)
1489 found_bad = TRUE;
1490 ucs2buf[0] = u8c;
1491 ucs2len = 1;
1492 }
1493 }
1494 else
1495 # endif
1496 {
1497 /* We don't know how long the byte sequence is, try
1498 * from one to three bytes. */
1499 for (bytelen = 1; bytelen <= size && bytelen <= 3;
1500 ++bytelen)
1501 {
1502 ucs2len = MultiByteToWideChar(codepage,
1503 MB_ERR_INVALID_CHARS,
1504 (LPCSTR)src, bytelen,
1505 ucs2buf, 3);
1506 if (ucs2len > 0)
1507 break;
1508 }
1509 if (ucs2len == 0)
1510 {
1511 /* If we have only one byte then it's probably an
1512 * incomplete byte sequence. Otherwise discard
1513 * one byte as a bad character. */
1514 if (size == 1)
1515 break;
1516 found_bad = TRUE;
1517 bytelen = 1;
1518 }
1519 }
1520
1521 if (!found_bad)
1522 {
1523 int i;
1524
1525 /* Convert "ucs2buf[ucs2len]" to 'enc' in "dst". */
1526 if (enc_utf8)
1527 {
1528 /* From UCS-2 to UTF-8. Cannot fail. */
1529 for (i = 0; i < ucs2len; ++i)
1530 dst += utf_char2bytes(ucs2buf[i], dst);
1531 }
1532 else
1533 {
1534 BOOL bad = FALSE;
1535 int dstlen;
1536
1537 /* From UCS-2 to "enc_codepage". If the
1538 * conversion uses the default character "?",
1539 * the data doesn't fit in this encoding. */
1540 dstlen = WideCharToMultiByte(enc_codepage, 0,
1541 (LPCWSTR)ucs2buf, ucs2len,
1542 (LPSTR)dst, (src - dst),
1543 replstr, &bad);
1544 if (bad)
1545 found_bad = TRUE;
1546 else
1547 dst += dstlen;
1548 }
1549 }
1550
1551 if (found_bad)
1552 {
1553 /* Deal with bytes we can't convert. */
1554 if (can_retry)
1555 goto rewind_retry;
1556 if (conv_error == 0)
1557 conv_error = readfile_linenr(linecnt, ptr, dst);
1558 if (bad_char_behavior != BAD_DROP)
1559 {
1560 if (bad_char_behavior == BAD_KEEP)
1561 {
1562 mch_memmove(dst, src, bytelen);
1563 dst += bytelen;
1564 }
1565 else
1566 *dst++ = bad_char_behavior;
1567 }
1568 }
1569
1570 src += bytelen;
1571 size -= bytelen;
1572 }
1573
1574 if (size > 0)
1400 { 1575 {
1401 int l, flen; 1576 /* An incomplete byte sequence remaining. */
1402 1577 mch_memmove(conv_rest, src, size);
1403 /* Handle CP_UTF8 ourselves to be able to handle trailing 1578 conv_restlen = size;
1404 * bytes properly. First find out the number of
1405 * characters and check for trailing bytes. */
1406 needed = 0;
1407 p = ptr;
1408 for (flen = from_size; flen > 0; flen -= l)
1409 {
1410 l = utf_ptr2len_len(p, flen);
1411 if (l > flen) /* incomplete char */
1412 {
1413 if (l > CONV_RESTLEN)
1414 /* weird overlong byte sequence */
1415 goto rewind_retry;
1416 mch_memmove(conv_rest, p, flen);
1417 conv_restlen = flen;
1418 from_size -= flen;
1419 break;
1420 }
1421 if (l == 1 && *p >= 0x80) /* illegal byte */
1422 goto rewind_retry;
1423 ++needed;
1424 p += l;
1425 }
1426 } 1579 }
1427 else 1580
1428 # endif 1581 /* The new size is equal to how much "dst" was advanced. */
1429 { 1582 size = dst - ptr;
1430 /* We can't tell if the last byte of an MBCS string is
1431 * valid and MultiByteToWideChar() returns zero if it
1432 * isn't. Try the whole string, and if that fails, bump
1433 * the last byte into conv_rest and try again. */
1434 needed = MultiByteToWideChar(FIO_GET_CP(fio_flags),
1435 MB_ERR_INVALID_CHARS, (LPCSTR)ptr, from_size,
1436 NULL, 0);
1437 if (needed == 0)
1438 {
1439 conv_rest[0] = ptr[from_size - 1];
1440 conv_restlen = 1;
1441 --from_size;
1442 needed = MultiByteToWideChar(FIO_GET_CP(fio_flags),
1443 MB_ERR_INVALID_CHARS, (LPCSTR)ptr, from_size,
1444 NULL, 0);
1445 }
1446
1447 /* If there really is a conversion error, try using another
1448 * conversion. */
1449 if (needed == 0)
1450 goto rewind_retry;
1451 }
1452
1453 /*
1454 * 2. convert from 'fileencoding' to ucs-2
1455 *
1456 * Put the result of conversion to UCS-2 at the end of the
1457 * buffer, then convert from UCS-2 to UTF-8 or "enc_codepage"
1458 * into the start of the buffer. If there is not enough space
1459 * just fail, there is probably something wrong.
1460 */
1461 ucsp = ptr + real_size - (needed * sizeof(WCHAR));
1462 if (ucsp < ptr + size)
1463 goto rewind_retry;
1464
1465 # ifdef CP_UTF8 /* VC 4.1 doesn't define CP_UTF8 */
1466 if (FIO_GET_CP(fio_flags) == CP_UTF8)
1467 {
1468 int l, flen;
1469
1470 /* Convert from utf-8 to ucs-2. */
1471 needed = 0;
1472 p = ptr;
1473 for (flen = from_size; flen > 0; flen -= l)
1474 {
1475 l = utf_ptr2len_len(p, flen);
1476 u8c = utf_ptr2char(p);
1477 ucsp[needed * 2] = (u8c & 0xff);
1478 ucsp[needed * 2 + 1] = (u8c >> 8);
1479 ++needed;
1480 p += l;
1481 }
1482 }
1483 else
1484 # endif
1485 needed = MultiByteToWideChar(FIO_GET_CP(fio_flags),
1486 MB_ERR_INVALID_CHARS, (LPCSTR)ptr,
1487 from_size, (LPWSTR)ucsp, needed);
1488
1489 /*
1490 * 3. convert from ucs-2 to 'encoding'
1491 */
1492 if (enc_utf8)
1493 {
1494 /* From UCS-2 to UTF-8. Cannot fail. */
1495 p = ptr;
1496 for (; needed > 0; --needed)
1497 {
1498 u8c = *ucsp++;
1499 u8c += (*ucsp++ << 8);
1500 p += utf_char2bytes(u8c, p);
1501 }
1502 size = p - ptr;
1503 }
1504 else
1505 {
1506 BOOL bad = FALSE;
1507
1508 /* From UCS-2 to "enc_codepage". If the conversion uses
1509 * the default character "?", the data doesn't fit in this
1510 * encoding, so fail (unless forced). */
1511 size = WideCharToMultiByte(enc_codepage, 0,
1512 (LPCWSTR)ucsp, needed,
1513 (LPSTR)ptr, real_size, "?", &bad);
1514 if (bad && !keep_dest_enc)
1515 goto rewind_retry;
1516 }
1517 } 1583 }
1518 else 1584 else
1519 # endif 1585 # endif
1520 # ifdef MACOS_X 1586 # ifdef MACOS_X
1521 if (fio_flags & FIO_MACROMAN) 1587 if (fio_flags & FIO_MACROMAN)
1626 if (p == ptr) 1692 if (p == ptr)
1627 { 1693 {
1628 /* Missing leading word. */ 1694 /* Missing leading word. */
1629 if (can_retry) 1695 if (can_retry)
1630 goto rewind_retry; 1696 goto rewind_retry;
1631 conv_error = TRUE; 1697 if (conv_error == 0)
1698 conv_error = readfile_linenr(linecnt,
1699 ptr, p);
1700 if (bad_char_behavior == BAD_DROP)
1701 continue;
1702 if (bad_char_behavior != BAD_KEEP)
1703 u8c = bad_char_behavior;
1632 } 1704 }
1633 1705
1634 /* found second word of double-word, get the first 1706 /* found second word of double-word, get the first
1635 * word and compute the resulting character */ 1707 * word and compute the resulting character */
1636 if (fio_flags & FIO_ENDIAN_L) 1708 if (fio_flags & FIO_ENDIAN_L)
1641 else 1713 else
1642 { 1714 {
1643 u16c = *--p; 1715 u16c = *--p;
1644 u16c += (*--p << 8); 1716 u16c += (*--p << 8);
1645 } 1717 }
1718 u8c = 0x10000 + ((u16c & 0x3ff) << 10)
1719 + (u8c & 0x3ff);
1720
1646 /* Check if the word is indeed a leading word. */ 1721 /* Check if the word is indeed a leading word. */
1647 if (u16c < 0xd800 || u16c > 0xdbff) 1722 if (u16c < 0xd800 || u16c > 0xdbff)
1648 { 1723 {
1649 if (can_retry) 1724 if (can_retry)
1650 goto rewind_retry; 1725 goto rewind_retry;
1651 conv_error = TRUE; 1726 if (conv_error == 0)
1727 conv_error = readfile_linenr(linecnt,
1728 ptr, p);
1729 if (bad_char_behavior == BAD_DROP)
1730 continue;
1731 if (bad_char_behavior != BAD_KEEP)
1732 u8c = bad_char_behavior;
1652 } 1733 }
1653 u8c = 0x10000 + ((u16c & 0x3ff) << 10)
1654 + (u8c & 0x3ff);
1655 } 1734 }
1656 } 1735 }
1657 else if (fio_flags & FIO_UCS4) 1736 else if (fio_flags & FIO_UCS4)
1658 { 1737 {
1659 if (fio_flags & FIO_ENDIAN_L) 1738 if (fio_flags & FIO_ENDIAN_L)
1676 if (*--p < 0x80) 1755 if (*--p < 0x80)
1677 u8c = *p; 1756 u8c = *p;
1678 else 1757 else
1679 { 1758 {
1680 len = utf_head_off(ptr, p); 1759 len = utf_head_off(ptr, p);
1760 p -= len;
1761 u8c = utf_ptr2char(p);
1681 if (len == 0) 1762 if (len == 0)
1682 { 1763 {
1683 /* Not a valid UTF-8 character, retry with 1764 /* Not a valid UTF-8 character, retry with
1684 * another fenc when possible, otherwise just 1765 * another fenc when possible, otherwise just
1685 * report the error. */ 1766 * report the error. */
1686 if (can_retry) 1767 if (can_retry)
1687 goto rewind_retry; 1768 goto rewind_retry;
1688 conv_error = TRUE; 1769 if (conv_error == 0)
1770 conv_error = readfile_linenr(linecnt,
1771 ptr, p);
1772 if (bad_char_behavior == BAD_DROP)
1773 continue;
1774 if (bad_char_behavior != BAD_KEEP)
1775 u8c = bad_char_behavior;
1689 } 1776 }
1690 p -= len;
1691 u8c = utf_ptr2char(p);
1692 } 1777 }
1693 } 1778 }
1694 if (enc_utf8) /* produce UTF-8 */ 1779 if (enc_utf8) /* produce UTF-8 */
1695 { 1780 {
1696 dest -= utf_char2len(u8c); 1781 dest -= utf_char2len(u8c);
1702 if (u8c >= 0x100) 1787 if (u8c >= 0x100)
1703 { 1788 {
1704 /* character doesn't fit in latin1, retry with 1789 /* character doesn't fit in latin1, retry with
1705 * another fenc when possible, otherwise just 1790 * another fenc when possible, otherwise just
1706 * report the error. */ 1791 * report the error. */
1707 if (can_retry && !keep_dest_enc) 1792 if (can_retry)
1708 goto rewind_retry; 1793 goto rewind_retry;
1709 *dest = 0xBF; 1794 if (conv_error == 0)
1710 conv_error = TRUE; 1795 conv_error = readfile_linenr(linecnt, ptr, p);
1796 if (bad_char_behavior == BAD_DROP)
1797 ++dest;
1798 else if (bad_char_behavior == BAD_KEEP)
1799 *dest = u8c;
1800 else if (eap != NULL && eap->bad_char != 0)
1801 *dest = bad_char_behavior;
1802 else
1803 *dest = 0xBF;
1711 } 1804 }
1712 else 1805 else
1713 *dest = u8c; 1806 *dest = u8c;
1714 } 1807 }
1715 } 1808 }
1718 line_start = dest - linerest; 1811 line_start = dest - linerest;
1719 mch_memmove(line_start, buffer, (size_t)linerest); 1812 mch_memmove(line_start, buffer, (size_t)linerest);
1720 size = (long)((ptr + real_size) - dest); 1813 size = (long)((ptr + real_size) - dest);
1721 ptr = dest; 1814 ptr = dest;
1722 } 1815 }
1723 else if (enc_utf8 && !conv_error && !curbuf->b_p_bin) 1816 else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
1724 { 1817 {
1725 /* Reading UTF-8: Check if the bytes are valid UTF-8. 1818 /* Reading UTF-8: Check if the bytes are valid UTF-8.
1726 * Need to start before "ptr" when part of the character was 1819 * Need to start before "ptr" when part of the character was
1727 * read in the previous read() call. */ 1820 * read in the previous read() call. */
1728 for (p = ptr - utf_head_off(buffer, ptr); p < ptr + size; ++p) 1821 for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
1729 { 1822 {
1823 int todo = (ptr + size) - p;
1824 int l;
1825
1826 if (todo <= 0)
1827 break;
1730 if (*p >= 0x80) 1828 if (*p >= 0x80)
1731 { 1829 {
1732 len = utf_ptr2len(p);
1733 /* A length of 1 means it's an illegal byte. Accept 1830 /* A length of 1 means it's an illegal byte. Accept
1734 * an incomplete character at the end though, the next 1831 * an incomplete character at the end though, the next
1735 * read() will get the next bytes, we'll check it 1832 * read() will get the next bytes, we'll check it
1736 * then. */ 1833 * then. */
1737 if (len == 1) 1834 l = utf_ptr2len_len(p, todo);
1835 if (l > todo)
1738 { 1836 {
1739 p += utf_byte2len(*p) - 1; 1837 /* Incomplete byte sequence, the next read()
1838 * should get them and check the bytes. */
1839 p += todo;
1740 break; 1840 break;
1741 } 1841 }
1742 p += len - 1; 1842 if (l == 1)
1843 {
1844 /* Illegal byte. If we can try another encoding
1845 * do that. */
1846 if (can_retry)
1847 break;
1848
1849 /* Remember the first linenr with an illegal byte */
1850 if (illegal_byte == 0)
1851 illegal_byte = readfile_linenr(linecnt, ptr, p);
1852 # ifdef USE_ICONV
1853 /* When we did a conversion report an error. */
1854 if (iconv_fd != (iconv_t)-1 && conv_error == 0)
1855 conv_error = readfile_linenr(linecnt, ptr, p);
1856 # endif
1857
1858 /* Drop, keep or replace the bad byte. */
1859 if (bad_char_behavior == BAD_DROP)
1860 {
1861 mch_memmove(p, p+1, todo - 1);
1862 --p;
1863 --size;
1864 }
1865 else if (bad_char_behavior != BAD_KEEP)
1866 *p = bad_char_behavior;
1867 }
1868 p += l - 1;
1743 } 1869 }
1744 } 1870 }
1745 if (p < ptr + size) 1871 if (p < ptr + size)
1746 { 1872 {
1747 /* Detected a UTF-8 error. */ 1873 /* Detected a UTF-8 error. */
1748 if (can_retry)
1749 {
1750 rewind_retry: 1874 rewind_retry:
1751 /* Retry reading with another conversion. */ 1875 /* Retry reading with another conversion. */
1752 # if defined(FEAT_EVAL) && defined(USE_ICONV) 1876 # if defined(FEAT_EVAL) && defined(USE_ICONV)
1753 if (*p_ccv != NUL && iconv_fd != (iconv_t)-1) 1877 if (*p_ccv != NUL && iconv_fd != (iconv_t)-1)
1754 /* iconv() failed, try 'charconvert' */ 1878 /* iconv() failed, try 'charconvert' */
1755 did_iconv = TRUE; 1879 did_iconv = TRUE;
1756 else
1757 # endif
1758 /* use next item from 'fileencodings' */
1759 advance_fenc = TRUE;
1760 file_rewind = TRUE;
1761 goto retry;
1762 }
1763
1764 /* There is no alternative fenc, just report the error. */
1765 # ifdef USE_ICONV
1766 if (iconv_fd != (iconv_t)-1)
1767 conv_error = TRUE;
1768 else 1880 else
1769 # endif 1881 # endif
1770 if (illegal_byte == 0) /* Keep the first linenr */ 1882 /* use next item from 'fileencodings' */
1771 { 1883 advance_fenc = TRUE;
1772 char_u *s; 1884 file_rewind = TRUE;
1773 1885 goto retry;
1774 /* Estimate the line number. */
1775 illegal_byte = curbuf->b_ml.ml_line_count - linecnt + 1;
1776 for (s = ptr; s < p; ++s)
1777 if (*s == '\n')
1778 ++illegal_byte;
1779 }
1780 } 1886 }
1781 } 1887 }
1782 #endif 1888 #endif
1783 1889
1784 /* count the number of characters (after conversion!) */ 1890 /* count the number of characters (after conversion!) */
2157 STRCAT(IObuff, _("[crypted]")); 2263 STRCAT(IObuff, _("[crypted]"));
2158 c = TRUE; 2264 c = TRUE;
2159 } 2265 }
2160 #endif 2266 #endif
2161 #ifdef FEAT_MBYTE 2267 #ifdef FEAT_MBYTE
2162 if (conv_error) 2268 if (conv_error != 0)
2163 { 2269 {
2164 STRCAT(IObuff, _("[CONVERSION ERROR]")); 2270 sprintf((char *)IObuff + STRLEN(IObuff),
2271 _("[CONVERSION ERROR in line %ld]"), (long)conv_error);
2165 c = TRUE; 2272 c = TRUE;
2166 } 2273 }
2167 else if (illegal_byte > 0) 2274 else if (illegal_byte > 0)
2168 { 2275 {
2169 sprintf((char *)IObuff + STRLEN(IObuff), 2276 sprintf((char *)IObuff + STRLEN(IObuff),
2213 } 2320 }
2214 2321
2215 /* with errors writing the file requires ":w!" */ 2322 /* with errors writing the file requires ":w!" */
2216 if (newfile && (error 2323 if (newfile && (error
2217 #ifdef FEAT_MBYTE 2324 #ifdef FEAT_MBYTE
2218 || conv_error 2325 || conv_error != 0
2219 #endif 2326 #endif
2220 )) 2327 ))
2221 curbuf->b_p_ro = TRUE; 2328 curbuf->b_p_ro = TRUE;
2222 2329
2223 u_clearline(); /* cannot use "U" command after adding lines */ 2330 u_clearline(); /* cannot use "U" command after adding lines */
2294 2401
2295 if (recoverymode && error) 2402 if (recoverymode && error)
2296 return FAIL; 2403 return FAIL;
2297 return OK; 2404 return OK;
2298 } 2405 }
2406
2407 #ifdef FEAT_MBYTE
2408
2409 /*
2410 * From the current line count and characters read after that, estimate the
2411 * line number where we are now.
2412 * Used for error messages that include a line number.
2413 */
2414 static linenr_T
2415 readfile_linenr(linecnt, p, endp)
2416 linenr_T linecnt; /* line count before reading more bytes */
2417 char_u *p; /* start of more bytes read */
2418 char_u *endp; /* end of more bytes read */
2419 {
2420 char_u *s;
2421 linenr_T lnum;
2422
2423 lnum = curbuf->b_ml.ml_line_count - linecnt + 1;
2424 for (s = p; s < endp; ++s)
2425 if (*s == '\n')
2426 ++lnum;
2427 return lnum;
2428 }
2429 #endif
2299 2430
2300 /* 2431 /*
2301 * Fill "*eap" to force the 'fileencoding' and 'fileformat' to be equal to the 2432 * Fill "*eap" to force the 'fileencoding' and 'fileformat' to be equal to the
2302 * buffer "buf". Used for calling readfile(). 2433 * buffer "buf". Used for calling readfile().
2303 * Returns OK or FAIL. 2434 * Returns OK or FAIL.