comparison src/fileio.c @ 1597:80b9076e9e17 v7.1.310

updated for version 7.1-310
author vimboss
date Wed, 04 Jun 2008 17:37:34 +0000
parents ebe059f69b9e
children 73fe8baea242
comparison
equal deleted inserted replaced
1596:5898cc36322e 1597:80b9076e9e17
1286 if (size < 0) /* read error */ 1286 if (size < 0) /* read error */
1287 error = TRUE; 1287 error = TRUE;
1288 #ifdef FEAT_MBYTE 1288 #ifdef FEAT_MBYTE
1289 else if (conv_restlen > 0) 1289 else if (conv_restlen > 0)
1290 { 1290 {
1291 /* Reached end-of-file but some trailing bytes could 1291 /*
1292 * not be converted. Truncated file? */ 1292 * Reached end-of-file but some trailing bytes could
1293 if (conv_error == 0) 1293 * not be converted. Truncated file?
1294 conv_error = linecnt; 1294 */
1295 if (bad_char_behavior != BAD_DROP) 1295
1296 /* When we did a conversion report an error. */
1297 if (fio_flags != 0
1298 # ifdef USE_ICONV
1299 || iconv_fd != (iconv_t)-1
1300 # endif
1301 )
1296 { 1302 {
1303 if (conv_error == 0)
1304 conv_error = curbuf->b_ml.ml_line_count
1305 - linecnt + 1;
1306 }
1307 /* Remember the first linenr with an illegal byte */
1308 else if (illegal_byte == 0)
1309 illegal_byte = curbuf->b_ml.ml_line_count
1310 - linecnt + 1;
1311 if (bad_char_behavior == BAD_DROP)
1312 {
1313 *(ptr - conv_restlen) = NUL;
1314 conv_restlen = 0;
1315 }
1316 else
1317 {
1318 /* Replace the trailing bytes with the replacement
1319 * character if we were converting; if we weren't,
1320 * leave the UTF8 checking code to do it, as it
1321 * works slightly differently. */
1322 if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
1323 # ifdef USE_ICONV
1324 || iconv_fd != (iconv_t)-1
1325 # endif
1326 ))
1327 {
1328 while (conv_restlen > 0)
1329 {
1330 *(--ptr) = bad_char_behavior;
1331 --conv_restlen;
1332 }
1333 }
1297 fio_flags = 0; /* don't convert this */ 1334 fio_flags = 0; /* don't convert this */
1298 # ifdef USE_ICONV 1335 # ifdef USE_ICONV
1299 if (iconv_fd != (iconv_t)-1) 1336 if (iconv_fd != (iconv_t)-1)
1300 { 1337 {
1301 iconv_close(iconv_fd); 1338 iconv_close(iconv_fd);
1302 iconv_fd = (iconv_t)-1; 1339 iconv_fd = (iconv_t)-1;
1303 } 1340 }
1304 # endif 1341 # endif
1305 if (bad_char_behavior == BAD_KEEP)
1306 {
1307 /* Keep the trailing bytes as-is. */
1308 size = conv_restlen;
1309 ptr -= conv_restlen;
1310 }
1311 else
1312 {
1313 /* Replace the trailing bytes with the
1314 * replacement character. */
1315 size = 1;
1316 *--ptr = bad_char_behavior;
1317 }
1318 conv_restlen = 0;
1319 } 1342 }
1320 } 1343 }
1321 #endif 1344 #endif
1322 } 1345 }
1323 1346
1395 /* retry reading without getting new bytes or rewinding */ 1418 /* retry reading without getting new bytes or rewinding */
1396 skip_read = TRUE; 1419 skip_read = TRUE;
1397 goto retry; 1420 goto retry;
1398 } 1421 }
1399 } 1422 }
1423
1424 /* Include not converted bytes. */
1425 ptr -= conv_restlen;
1426 size += conv_restlen;
1427 conv_restlen = 0;
1400 #endif 1428 #endif
1401 /* 1429 /*
1402 * Break here for a read error or end-of-file. 1430 * Break here for a read error or end-of-file.
1403 */ 1431 */
1404 if (size <= 0) 1432 if (size <= 0)
1405 break; 1433 break;
1406 1434
1407 #ifdef FEAT_MBYTE 1435 #ifdef FEAT_MBYTE
1408
1409 /* Include not converted bytes. */
1410 ptr -= conv_restlen;
1411 size += conv_restlen;
1412 conv_restlen = 0;
1413 1436
1414 # ifdef USE_ICONV 1437 # ifdef USE_ICONV
1415 if (iconv_fd != (iconv_t)-1) 1438 if (iconv_fd != (iconv_t)-1)
1416 { 1439 {
1417 /* 1440 /*
1870 line_start = dest - linerest; 1893 line_start = dest - linerest;
1871 mch_memmove(line_start, buffer, (size_t)linerest); 1894 mch_memmove(line_start, buffer, (size_t)linerest);
1872 size = (long)((ptr + real_size) - dest); 1895 size = (long)((ptr + real_size) - dest);
1873 ptr = dest; 1896 ptr = dest;
1874 } 1897 }
1875 else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin) 1898 else if (enc_utf8 && !curbuf->b_p_bin)
1876 { 1899 {
1877 /* Reading UTF-8: Check if the bytes are valid UTF-8. 1900 int incomplete_tail = FALSE;
1878 * Need to start before "ptr" when part of the character was 1901
1879 * read in the previous read() call. */ 1902 /* Reading UTF-8: Check if the bytes are valid UTF-8. */
1880 for (p = ptr - utf_head_off(buffer, ptr); ; ++p) 1903 for (p = ptr; ; ++p)
1881 { 1904 {
1882 int todo = (int)((ptr + size) - p); 1905 int todo = (int)((ptr + size) - p);
1883 int l; 1906 int l;
1884 1907
1885 if (todo <= 0) 1908 if (todo <= 0)
1889 /* A length of 1 means it's an illegal byte. Accept 1912 /* A length of 1 means it's an illegal byte. Accept
1890 * an incomplete character at the end though, the next 1913 * an incomplete character at the end though, the next
1891 * read() will get the next bytes, we'll check it 1914 * read() will get the next bytes, we'll check it
1892 * then. */ 1915 * then. */
1893 l = utf_ptr2len_len(p, todo); 1916 l = utf_ptr2len_len(p, todo);
1894 if (l > todo) 1917 if (l > todo && !incomplete_tail)
1895 { 1918 {
1896 /* Incomplete byte sequence, the next read() 1919 /* Avoid retrying with a different encoding when
1897 * should get them and check the bytes. */ 1920 * a truncated file is more likely, or attempting
1898 p += todo; 1921 * to read the rest of an incomplete sequence when
1899 break; 1922 * we have already done so. */
1923 if (p > ptr || filesize > 0)
1924 incomplete_tail = TRUE;
1925 /* Incomplete byte sequence, move it to conv_rest[]
1926 * and try to read the rest of it, unless we've
1927 * already done so. */
1928 if (p > ptr)
1929 {
1930 conv_restlen = todo;
1931 mch_memmove(conv_rest, p, conv_restlen);
1932 size -= conv_restlen;
1933 break;
1934 }
1900 } 1935 }
1901 if (l == 1) 1936 if (l == 1 || l > todo)
1902 { 1937 {
1903 /* Illegal byte. If we can try another encoding 1938 /* Illegal byte. If we can try another encoding
1904 * do that. */ 1939 * do that, unless at EOF where a truncated
1905 if (can_retry) 1940 * file is more likely than a conversion error. */
1941 if (can_retry && !incomplete_tail)
1906 break; 1942 break;
1907
1908 /* Remember the first linenr with an illegal byte */
1909 if (illegal_byte == 0)
1910 illegal_byte = readfile_linenr(linecnt, ptr, p);
1911 # ifdef USE_ICONV 1943 # ifdef USE_ICONV
1912 /* When we did a conversion report an error. */ 1944 /* When we did a conversion report an error. */
1913 if (iconv_fd != (iconv_t)-1 && conv_error == 0) 1945 if (iconv_fd != (iconv_t)-1 && conv_error == 0)
1914 conv_error = readfile_linenr(linecnt, ptr, p); 1946 conv_error = readfile_linenr(linecnt, ptr, p);
1915 # endif 1947 # endif
1948 /* Remember the first linenr with an illegal byte */
1949 if (conv_error == 0 && illegal_byte == 0)
1950 illegal_byte = readfile_linenr(linecnt, ptr, p);
1916 1951
1917 /* Drop, keep or replace the bad byte. */ 1952 /* Drop, keep or replace the bad byte. */
1918 if (bad_char_behavior == BAD_DROP) 1953 if (bad_char_behavior == BAD_DROP)
1919 { 1954 {
1920 mch_memmove(p, p+1, todo - 1); 1955 mch_memmove(p, p + 1, todo - 1);
1921 --p; 1956 --p;
1922 --size; 1957 --size;
1923 } 1958 }
1924 else if (bad_char_behavior != BAD_KEEP) 1959 else if (bad_char_behavior != BAD_KEEP)
1925 *p = bad_char_behavior; 1960 *p = bad_char_behavior;
1926 } 1961 }
1927 p += l - 1; 1962 else
1963 p += l - 1;
1928 } 1964 }
1929 } 1965 }
1930 if (p < ptr + size) 1966 if (p < ptr + size && !incomplete_tail)
1931 { 1967 {
1932 /* Detected a UTF-8 error. */ 1968 /* Detected a UTF-8 error. */
1933 rewind_retry: 1969 rewind_retry:
1934 /* Retry reading with another conversion. */ 1970 /* Retry reading with another conversion. */
1935 # if defined(FEAT_EVAL) && defined(USE_ICONV) 1971 # if defined(FEAT_EVAL) && defined(USE_ICONV)