comparison src/eval.c @ 3320:45467d77047d v7.3.427

updated for version 7.3.427 Problem: readfile() can be slow with long lines. Solution: Use realloc() instead of alloc(). (John Little)
author Bram Moolenaar <bram@vim.org>
date Sun, 05 Feb 2012 00:39:18 +0100
parents 22a626604672
children 636c2b1cdc8b
comparison
equal deleted inserted replaced
3319:1343b311098d 3320:45467d77047d
14323 f_readfile(argvars, rettv) 14323 f_readfile(argvars, rettv)
14324 typval_T *argvars; 14324 typval_T *argvars;
14325 typval_T *rettv; 14325 typval_T *rettv;
14326 { 14326 {
14327 int binary = FALSE; 14327 int binary = FALSE;
14328 int failed = FALSE;
14328 char_u *fname; 14329 char_u *fname;
14329 FILE *fd; 14330 FILE *fd;
14330 listitem_T *li; 14331 char_u buf[(IOSIZE/256)*256]; /* rounded to avoid odd + 1 */
14331 #define FREAD_SIZE 200 /* optimized for text lines */ 14332 int io_size = sizeof(buf);
14332 char_u buf[FREAD_SIZE]; 14333 int readlen; /* size of last fread() */
14333 int readlen; /* size of last fread() */ 14334 char_u *prev = NULL; /* previously read bytes, if any */
14334 int buflen; /* nr of valid chars in buf[] */ 14335 long prevlen = 0; /* length of data in prev */
14335 int filtd; /* how much in buf[] was NUL -> '\n' filtered */ 14336 long prevsize = 0; /* size of prev buffer */
14336 int tolist; /* first byte in buf[] still to be put in list */ 14337 long maxline = MAXLNUM;
14337 int chop; /* how many CR to chop off */ 14338 long cnt = 0;
14338 char_u *prev = NULL; /* previously read bytes, if any */ 14339 char_u *p; /* position in buf */
14339 int prevlen = 0; /* length of "prev" if not NULL */ 14340 char_u *start; /* start of current line */
14340 char_u *s;
14341 int len;
14342 long maxline = MAXLNUM;
14343 long cnt = 0;
14344 14341
14345 if (argvars[1].v_type != VAR_UNKNOWN) 14342 if (argvars[1].v_type != VAR_UNKNOWN)
14346 { 14343 {
14347 if (STRCMP(get_tv_string(&argvars[1]), "b") == 0) 14344 if (STRCMP(get_tv_string(&argvars[1]), "b") == 0)
14348 binary = TRUE; 14345 binary = TRUE;
14360 { 14357 {
14361 EMSG2(_(e_notopen), *fname == NUL ? (char_u *)_("<empty>") : fname); 14358 EMSG2(_(e_notopen), *fname == NUL ? (char_u *)_("<empty>") : fname);
14362 return; 14359 return;
14363 } 14360 }
14364 14361
14365 filtd = 0;
14366 while (cnt < maxline || maxline < 0) 14362 while (cnt < maxline || maxline < 0)
14367 { 14363 {
14368 readlen = (int)fread(buf + filtd, 1, FREAD_SIZE - filtd, fd); 14364 readlen = (int)fread(buf, 1, io_size, fd);
14369 buflen = filtd + readlen; 14365
14370 tolist = 0; 14366 /* This for loop processes what was read, but is also entered at end
14371 for ( ; filtd < buflen || readlen <= 0; ++filtd) 14367 * of file so that either:
14372 { 14368 * - an incomplete line gets written
14373 if (readlen <= 0 || buf[filtd] == '\n') 14369 * - a "binary" file gets an empty line at the end if it ends in a
14370 * newline. */
14371 for (p = buf, start = buf;
14372 p < buf + readlen || (readlen <= 0 && (prevlen > 0 || binary));
14373 ++p)
14374 {
14375 if (*p == '\n' || readlen <= 0)
14374 { 14376 {
14375 /* In binary mode add an empty list item when the last 14377 listitem_T *li;
14376 * non-empty line ends in a '\n'. */ 14378 char_u *s = NULL;
14377 if (!binary && readlen == 0 && filtd == 0 && prev == NULL) 14379 long_u len = p - start;
14378 break; 14380
14379 14381 /* Finished a line. Remove CRs before NL. */
14380 /* Found end-of-line or end-of-file: add a text line to the 14382 if (readlen > 0 && !binary)
14381 * list. */ 14383 {
14382 chop = 0; 14384 while (len > 0 && start[len - 1] == '\r')
14383 if (!binary) 14385 --len;
14384 while (filtd - chop - 1 >= tolist 14386 /* removal may cross back to the "prev" string */
14385 && buf[filtd - chop - 1] == '\r') 14387 if (len == 0)
14386 ++chop; 14388 while (prevlen > 0 && prev[prevlen - 1] == '\r')
14387 len = filtd - tolist - chop; 14389 --prevlen;
14388 if (prev == NULL) 14390 }
14389 s = vim_strnsave(buf + tolist, len); 14391 if (prevlen == 0)
14392 s = vim_strnsave(start, len);
14390 else 14393 else
14391 { 14394 {
14392 s = alloc((unsigned)(prevlen + len + 1)); 14395 /* Change "prev" buffer to be the right size. This way
14393 if (s != NULL) 14396 * the bytes are only copied once, and very long lines are
14397 * allocated only once. */
14398 if ((s = vim_realloc(prev, prevlen + len + 1)) != NULL)
14394 { 14399 {
14395 mch_memmove(s, prev, prevlen); 14400 mch_memmove(s + prevlen, start, len);
14396 vim_free(prev);
14397 prev = NULL;
14398 mch_memmove(s + prevlen, buf + tolist, len);
14399 s[prevlen + len] = NUL; 14401 s[prevlen + len] = NUL;
14402 prev = NULL; /* the list will own the string */
14403 prevlen = prevsize = 0;
14400 } 14404 }
14401 } 14405 }
14402 tolist = filtd + 1; 14406 if (s == NULL)
14403 14407 {
14404 li = listitem_alloc(); 14408 do_outofmem_msg((long_u) prevlen + len + 1);
14405 if (li == NULL) 14409 failed = TRUE;
14410 break;
14411 }
14412
14413 if ((li = listitem_alloc()) == NULL)
14406 { 14414 {
14407 vim_free(s); 14415 vim_free(s);
14416 failed = TRUE;
14408 break; 14417 break;
14409 } 14418 }
14410 li->li_tv.v_type = VAR_STRING; 14419 li->li_tv.v_type = VAR_STRING;
14411 li->li_tv.v_lock = 0; 14420 li->li_tv.v_lock = 0;
14412 li->li_tv.vval.v_string = s; 14421 li->li_tv.vval.v_string = s;
14413 list_append(rettv->vval.v_list, li); 14422 list_append(rettv->vval.v_list, li);
14414 14423
14415 if (++cnt >= maxline && maxline >= 0) 14424 start = p + 1; /* step over newline */
14416 break; 14425 if ((++cnt >= maxline && maxline >= 0) || readlen <= 0)
14417 if (readlen <= 0)
14418 break; 14426 break;
14419 } 14427 }
14420 else if (buf[filtd] == NUL) 14428 else if (*p == NUL)
14421 buf[filtd] = '\n'; 14429 *p = '\n';
14422 #ifdef FEAT_MBYTE 14430 #ifdef FEAT_MBYTE
14423 else if (buf[filtd] == 0xef 14431 /* Check for utf8 "bom"; U+FEFF is encoded as EF BB BF. Do this
14424 && enc_utf8 14432 * when finding the BF and check the previous two bytes. */
14425 && filtd + 2 < buflen 14433 else if (*p == 0xbf && enc_utf8 && !binary)
14426 && !binary
14427 && buf[filtd + 1] == 0xbb
14428 && buf[filtd + 2] == 0xbf)
14429 { 14434 {
14430 /* remove utf-8 byte order mark */ 14435 /* Find the two bytes before the 0xbf. If p is at buf, or buf
14431 mch_memmove(buf + filtd, buf + filtd + 3, buflen - filtd - 3); 14436 * + 1, these may be in the "prev" string. */
14432 --filtd; 14437 char_u back1 = p >= buf + 1 ? p[-1]
14433 buflen -= 3; 14438 : prevlen >= 1 ? prev[prevlen - 1] : NUL;
14439 char_u back2 = p >= buf + 2 ? p[-2]
14440 : p == buf + 1 && prevlen >= 1 ? prev[prevlen - 1]
14441 : prevlen >= 2 ? prev[prevlen - 2] : NUL;
14442
14443 if (back2 == 0xef && back1 == 0xbb)
14444 {
14445 char_u *dest = p - 2;
14446
14447 /* Usually a BOM is at the beginning of a file, and so at
14448 * the beginning of a line; then we can just step over it.
14449 */
14450 if (start == dest)
14451 start = p + 1;
14452 else
14453 {
14454 /* have to shuffle buf to close gap */
14455 int adjust_prevlen = 0;
14456
14457 if (dest < buf)
14458 {
14459 adjust_prevlen = buf - dest; /* must be 1 or 2 */
14460 dest = buf;
14461 }
14462 if (readlen > p - buf + 1)
14463 mch_memmove(dest, p + 1, readlen - (p - buf) - 1);
14464 readlen -= 3 - adjust_prevlen;
14465 prevlen -= adjust_prevlen;
14466 p = dest - 1;
14467 }
14468 }
14434 } 14469 }
14435 #endif 14470 #endif
14436 } 14471 } /* for */
14437 if (readlen <= 0) 14472
14473 if (failed || (cnt >= maxline && maxline >= 0) || readlen <= 0)
14438 break; 14474 break;
14439 14475 if (start < p)
14440 if (tolist == 0) 14476 {
14441 { 14477 /* There's part of a line in buf, store it in "prev". */
14442 if (buflen >= FREAD_SIZE / 2) 14478 if (p - start + prevlen >= prevsize)
14443 { 14479 {
14444 /* "buf" is full, need to move text to an allocated buffer */ 14480 /* need bigger "prev" buffer */
14445 if (prev == NULL) 14481 char_u *newprev;
14446 { 14482
14447 prev = vim_strnsave(buf, buflen); 14483 /* A common use case is ordinary text files and "prev" gets a
14448 prevlen = buflen; 14484 * fragment of a line, so the first allocation is made
14449 } 14485 * small, to avoid repeatedly 'allocing' large and
14486 * 'reallocing' small. */
14487 if (prevsize == 0)
14488 prevsize = p - start;
14450 else 14489 else
14451 { 14490 {
14452 s = alloc((unsigned)(prevlen + buflen)); 14491 long grow50pc = (prevsize * 3) / 2;
14453 if (s != NULL) 14492 long growmin = (p - start) * 2 + prevlen;
14454 { 14493 prevsize = grow50pc > growmin ? grow50pc : growmin;
14455 mch_memmove(s, prev, prevlen);
14456 mch_memmove(s + prevlen, buf, buflen);
14457 vim_free(prev);
14458 prev = s;
14459 prevlen += buflen;
14460 }
14461 } 14494 }
14462 filtd = 0; 14495 if ((newprev = vim_realloc(prev, prevsize)) == NULL)
14496 {
14497 do_outofmem_msg((long_u)prevsize);
14498 failed = TRUE;
14499 break;
14500 }
14501 prev = newprev;
14463 } 14502 }
14464 } 14503 /* Add the line part to end of "prev". */
14465 else 14504 mch_memmove(prev + prevlen, start, p - start);
14466 { 14505 prevlen += p - start;
14467 mch_memmove(buf, buf + tolist, buflen - tolist); 14506 }
14468 filtd -= tolist; 14507 } /* while */
14469 }
14470 }
14471 14508
14472 /* 14509 /*
14473 * For a negative line count use only the lines at the end of the file, 14510 * For a negative line count use only the lines at the end of the file,
14474 * free the rest. 14511 * free the rest.
14475 */ 14512 */
14476 if (maxline < 0) 14513 if (!failed && maxline < 0)
14477 while (cnt > -maxline) 14514 while (cnt > -maxline)
14478 { 14515 {
14479 listitem_remove(rettv->vval.v_list, rettv->vval.v_list->lv_first); 14516 listitem_remove(rettv->vval.v_list, rettv->vval.v_list->lv_first);
14480 --cnt; 14517 --cnt;
14481 } 14518 }
14519
14520 if (failed)
14521 {
14522 list_free(rettv->vval.v_list, TRUE);
14523 /* readfile doc says an empty list is returned on error */
14524 rettv->vval.v_list = list_alloc();
14525 }
14482 14526
14483 vim_free(prev); 14527 vim_free(prev);
14484 fclose(fd); 14528 fclose(fd);
14485 } 14529 }
14486 14530