Mercurial > vim
comparison src/eval.c @ 3320:45467d77047d v7.3.427
updated for version 7.3.427
Problem: readfile() can be slow with long lines.
Solution: Use realloc() instead of alloc(). (John Little)
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Sun, 05 Feb 2012 00:39:18 +0100 |
parents | 22a626604672 |
children | 636c2b1cdc8b |
comparison
equal
deleted
inserted
replaced
3319:1343b311098d | 3320:45467d77047d |
---|---|
14323 f_readfile(argvars, rettv) | 14323 f_readfile(argvars, rettv) |
14324 typval_T *argvars; | 14324 typval_T *argvars; |
14325 typval_T *rettv; | 14325 typval_T *rettv; |
14326 { | 14326 { |
14327 int binary = FALSE; | 14327 int binary = FALSE; |
14328 int failed = FALSE; | |
14328 char_u *fname; | 14329 char_u *fname; |
14329 FILE *fd; | 14330 FILE *fd; |
14330 listitem_T *li; | 14331 char_u buf[(IOSIZE/256)*256]; /* rounded to avoid odd + 1 */ |
14331 #define FREAD_SIZE 200 /* optimized for text lines */ | 14332 int io_size = sizeof(buf); |
14332 char_u buf[FREAD_SIZE]; | 14333 int readlen; /* size of last fread() */ |
14333 int readlen; /* size of last fread() */ | 14334 char_u *prev = NULL; /* previously read bytes, if any */ |
14334 int buflen; /* nr of valid chars in buf[] */ | 14335 long prevlen = 0; /* length of data in prev */ |
14335 int filtd; /* how much in buf[] was NUL -> '\n' filtered */ | 14336 long prevsize = 0; /* size of prev buffer */ |
14336 int tolist; /* first byte in buf[] still to be put in list */ | 14337 long maxline = MAXLNUM; |
14337 int chop; /* how many CR to chop off */ | 14338 long cnt = 0; |
14338 char_u *prev = NULL; /* previously read bytes, if any */ | 14339 char_u *p; /* position in buf */ |
14339 int prevlen = 0; /* length of "prev" if not NULL */ | 14340 char_u *start; /* start of current line */ |
14340 char_u *s; | |
14341 int len; | |
14342 long maxline = MAXLNUM; | |
14343 long cnt = 0; | |
14344 | 14341 |
14345 if (argvars[1].v_type != VAR_UNKNOWN) | 14342 if (argvars[1].v_type != VAR_UNKNOWN) |
14346 { | 14343 { |
14347 if (STRCMP(get_tv_string(&argvars[1]), "b") == 0) | 14344 if (STRCMP(get_tv_string(&argvars[1]), "b") == 0) |
14348 binary = TRUE; | 14345 binary = TRUE; |
14360 { | 14357 { |
14361 EMSG2(_(e_notopen), *fname == NUL ? (char_u *)_("<empty>") : fname); | 14358 EMSG2(_(e_notopen), *fname == NUL ? (char_u *)_("<empty>") : fname); |
14362 return; | 14359 return; |
14363 } | 14360 } |
14364 | 14361 |
14365 filtd = 0; | |
14366 while (cnt < maxline || maxline < 0) | 14362 while (cnt < maxline || maxline < 0) |
14367 { | 14363 { |
14368 readlen = (int)fread(buf + filtd, 1, FREAD_SIZE - filtd, fd); | 14364 readlen = (int)fread(buf, 1, io_size, fd); |
14369 buflen = filtd + readlen; | 14365 |
14370 tolist = 0; | 14366 /* This for loop processes what was read, but is also entered at end |
14371 for ( ; filtd < buflen || readlen <= 0; ++filtd) | 14367 * of file so that either: |
14372 { | 14368 * - an incomplete line gets written |
14373 if (readlen <= 0 || buf[filtd] == '\n') | 14369 * - a "binary" file gets an empty line at the end if it ends in a |
14370 * newline. */ | |
14371 for (p = buf, start = buf; | |
14372 p < buf + readlen || (readlen <= 0 && (prevlen > 0 || binary)); | |
14373 ++p) | |
14374 { | |
14375 if (*p == '\n' || readlen <= 0) | |
14374 { | 14376 { |
14375 /* In binary mode add an empty list item when the last | 14377 listitem_T *li; |
14376 * non-empty line ends in a '\n'. */ | 14378 char_u *s = NULL; |
14377 if (!binary && readlen == 0 && filtd == 0 && prev == NULL) | 14379 long_u len = p - start; |
14378 break; | 14380 |
14379 | 14381 /* Finished a line. Remove CRs before NL. */ |
14380 /* Found end-of-line or end-of-file: add a text line to the | 14382 if (readlen > 0 && !binary) |
14381 * list. */ | 14383 { |
14382 chop = 0; | 14384 while (len > 0 && start[len - 1] == '\r') |
14383 if (!binary) | 14385 --len; |
14384 while (filtd - chop - 1 >= tolist | 14386 /* removal may cross back to the "prev" string */ |
14385 && buf[filtd - chop - 1] == '\r') | 14387 if (len == 0) |
14386 ++chop; | 14388 while (prevlen > 0 && prev[prevlen - 1] == '\r') |
14387 len = filtd - tolist - chop; | 14389 --prevlen; |
14388 if (prev == NULL) | 14390 } |
14389 s = vim_strnsave(buf + tolist, len); | 14391 if (prevlen == 0) |
14392 s = vim_strnsave(start, len); | |
14390 else | 14393 else |
14391 { | 14394 { |
14392 s = alloc((unsigned)(prevlen + len + 1)); | 14395 /* Change "prev" buffer to be the right size. This way |
14393 if (s != NULL) | 14396 * the bytes are only copied once, and very long lines are |
14397 * allocated only once. */ | |
14398 if ((s = vim_realloc(prev, prevlen + len + 1)) != NULL) | |
14394 { | 14399 { |
14395 mch_memmove(s, prev, prevlen); | 14400 mch_memmove(s + prevlen, start, len); |
14396 vim_free(prev); | |
14397 prev = NULL; | |
14398 mch_memmove(s + prevlen, buf + tolist, len); | |
14399 s[prevlen + len] = NUL; | 14401 s[prevlen + len] = NUL; |
14402 prev = NULL; /* the list will own the string */ | |
14403 prevlen = prevsize = 0; | |
14400 } | 14404 } |
14401 } | 14405 } |
14402 tolist = filtd + 1; | 14406 if (s == NULL) |
14403 | 14407 { |
14404 li = listitem_alloc(); | 14408 do_outofmem_msg((long_u) prevlen + len + 1); |
14405 if (li == NULL) | 14409 failed = TRUE; |
14410 break; | |
14411 } | |
14412 | |
14413 if ((li = listitem_alloc()) == NULL) | |
14406 { | 14414 { |
14407 vim_free(s); | 14415 vim_free(s); |
14416 failed = TRUE; | |
14408 break; | 14417 break; |
14409 } | 14418 } |
14410 li->li_tv.v_type = VAR_STRING; | 14419 li->li_tv.v_type = VAR_STRING; |
14411 li->li_tv.v_lock = 0; | 14420 li->li_tv.v_lock = 0; |
14412 li->li_tv.vval.v_string = s; | 14421 li->li_tv.vval.v_string = s; |
14413 list_append(rettv->vval.v_list, li); | 14422 list_append(rettv->vval.v_list, li); |
14414 | 14423 |
14415 if (++cnt >= maxline && maxline >= 0) | 14424 start = p + 1; /* step over newline */ |
14416 break; | 14425 if ((++cnt >= maxline && maxline >= 0) || readlen <= 0) |
14417 if (readlen <= 0) | |
14418 break; | 14426 break; |
14419 } | 14427 } |
14420 else if (buf[filtd] == NUL) | 14428 else if (*p == NUL) |
14421 buf[filtd] = '\n'; | 14429 *p = '\n'; |
14422 #ifdef FEAT_MBYTE | 14430 #ifdef FEAT_MBYTE |
14423 else if (buf[filtd] == 0xef | 14431 /* Check for utf8 "bom"; U+FEFF is encoded as EF BB BF. Do this |
14424 && enc_utf8 | 14432 * when finding the BF and check the previous two bytes. */ |
14425 && filtd + 2 < buflen | 14433 else if (*p == 0xbf && enc_utf8 && !binary) |
14426 && !binary | |
14427 && buf[filtd + 1] == 0xbb | |
14428 && buf[filtd + 2] == 0xbf) | |
14429 { | 14434 { |
14430 /* remove utf-8 byte order mark */ | 14435 /* Find the two bytes before the 0xbf. If p is at buf, or buf |
14431 mch_memmove(buf + filtd, buf + filtd + 3, buflen - filtd - 3); | 14436 * + 1, these may be in the "prev" string. */ |
14432 --filtd; | 14437 char_u back1 = p >= buf + 1 ? p[-1] |
14433 buflen -= 3; | 14438 : prevlen >= 1 ? prev[prevlen - 1] : NUL; |
14439 char_u back2 = p >= buf + 2 ? p[-2] | |
14440 : p == buf + 1 && prevlen >= 1 ? prev[prevlen - 1] | |
14441 : prevlen >= 2 ? prev[prevlen - 2] : NUL; | |
14442 | |
14443 if (back2 == 0xef && back1 == 0xbb) | |
14444 { | |
14445 char_u *dest = p - 2; | |
14446 | |
14447 /* Usually a BOM is at the beginning of a file, and so at | |
14448 * the beginning of a line; then we can just step over it. | |
14449 */ | |
14450 if (start == dest) | |
14451 start = p + 1; | |
14452 else | |
14453 { | |
14454 /* have to shuffle buf to close gap */ | |
14455 int adjust_prevlen = 0; | |
14456 | |
14457 if (dest < buf) | |
14458 { | |
14459 adjust_prevlen = buf - dest; /* must be 1 or 2 */ | |
14460 dest = buf; | |
14461 } | |
14462 if (readlen > p - buf + 1) | |
14463 mch_memmove(dest, p + 1, readlen - (p - buf) - 1); | |
14464 readlen -= 3 - adjust_prevlen; | |
14465 prevlen -= adjust_prevlen; | |
14466 p = dest - 1; | |
14467 } | |
14468 } | |
14434 } | 14469 } |
14435 #endif | 14470 #endif |
14436 } | 14471 } /* for */ |
14437 if (readlen <= 0) | 14472 |
14473 if (failed || (cnt >= maxline && maxline >= 0) || readlen <= 0) | |
14438 break; | 14474 break; |
14439 | 14475 if (start < p) |
14440 if (tolist == 0) | 14476 { |
14441 { | 14477 /* There's part of a line in buf, store it in "prev". */ |
14442 if (buflen >= FREAD_SIZE / 2) | 14478 if (p - start + prevlen >= prevsize) |
14443 { | 14479 { |
14444 /* "buf" is full, need to move text to an allocated buffer */ | 14480 /* need bigger "prev" buffer */ |
14445 if (prev == NULL) | 14481 char_u *newprev; |
14446 { | 14482 |
14447 prev = vim_strnsave(buf, buflen); | 14483 /* A common use case is ordinary text files and "prev" gets a |
14448 prevlen = buflen; | 14484 * fragment of a line, so the first allocation is made |
14449 } | 14485 * small, to avoid repeatedly 'allocing' large and |
14486 * 'reallocing' small. */ | |
14487 if (prevsize == 0) | |
14488 prevsize = p - start; | |
14450 else | 14489 else |
14451 { | 14490 { |
14452 s = alloc((unsigned)(prevlen + buflen)); | 14491 long grow50pc = (prevsize * 3) / 2; |
14453 if (s != NULL) | 14492 long growmin = (p - start) * 2 + prevlen; |
14454 { | 14493 prevsize = grow50pc > growmin ? grow50pc : growmin; |
14455 mch_memmove(s, prev, prevlen); | |
14456 mch_memmove(s + prevlen, buf, buflen); | |
14457 vim_free(prev); | |
14458 prev = s; | |
14459 prevlen += buflen; | |
14460 } | |
14461 } | 14494 } |
14462 filtd = 0; | 14495 if ((newprev = vim_realloc(prev, prevsize)) == NULL) |
14496 { | |
14497 do_outofmem_msg((long_u)prevsize); | |
14498 failed = TRUE; | |
14499 break; | |
14500 } | |
14501 prev = newprev; | |
14463 } | 14502 } |
14464 } | 14503 /* Add the line part to end of "prev". */ |
14465 else | 14504 mch_memmove(prev + prevlen, start, p - start); |
14466 { | 14505 prevlen += p - start; |
14467 mch_memmove(buf, buf + tolist, buflen - tolist); | 14506 } |
14468 filtd -= tolist; | 14507 } /* while */ |
14469 } | |
14470 } | |
14471 | 14508 |
14472 /* | 14509 /* |
14473 * For a negative line count use only the lines at the end of the file, | 14510 * For a negative line count use only the lines at the end of the file, |
14474 * free the rest. | 14511 * free the rest. |
14475 */ | 14512 */ |
14476 if (maxline < 0) | 14513 if (!failed && maxline < 0) |
14477 while (cnt > -maxline) | 14514 while (cnt > -maxline) |
14478 { | 14515 { |
14479 listitem_remove(rettv->vval.v_list, rettv->vval.v_list->lv_first); | 14516 listitem_remove(rettv->vval.v_list, rettv->vval.v_list->lv_first); |
14480 --cnt; | 14517 --cnt; |
14481 } | 14518 } |
14519 | |
14520 if (failed) | |
14521 { | |
14522 list_free(rettv->vval.v_list, TRUE); | |
14523 /* readfile doc says an empty list is returned on error */ | |
14524 rettv->vval.v_list = list_alloc(); | |
14525 } | |
14482 | 14526 |
14483 vim_free(prev); | 14527 vim_free(prev); |
14484 fclose(fd); | 14528 fclose(fd); |
14485 } | 14529 } |
14486 | 14530 |