comparison src/memfile.c @ 2730:e0a90042318d

updated for version 7.3.143 Problem: Memfile is not tested sufficiently. Looking up blocks in a memfile is slow when there are many blocks. Solution: Add high level test and unittest. Adjust the number of hash buckets to the number of blocks. (Ivan Krasilnikov)
author Bram Moolenaar <bram@vim.org>
date Tue, 22 Mar 2011 18:10:45 +0100
parents 1a0d346695fa
children 8bd38abda314
comparison
equal deleted inserted replaced
2729:12f838be9c59 2730:e0a90042318d
82 static int mf_read __ARGS((memfile_T *, bhdr_T *)); 82 static int mf_read __ARGS((memfile_T *, bhdr_T *));
83 static int mf_write __ARGS((memfile_T *, bhdr_T *)); 83 static int mf_write __ARGS((memfile_T *, bhdr_T *));
84 static int mf_write_block __ARGS((memfile_T *mfp, bhdr_T *hp, off_t offset, unsigned size)); 84 static int mf_write_block __ARGS((memfile_T *mfp, bhdr_T *hp, off_t offset, unsigned size));
85 static int mf_trans_add __ARGS((memfile_T *, bhdr_T *)); 85 static int mf_trans_add __ARGS((memfile_T *, bhdr_T *));
86 static void mf_do_open __ARGS((memfile_T *, char_u *, int)); 86 static void mf_do_open __ARGS((memfile_T *, char_u *, int));
87 static void mf_hash_init __ARGS((mf_hashtab_T *));
88 static void mf_hash_free __ARGS((mf_hashtab_T *));
89 static void mf_hash_free_all __ARGS((mf_hashtab_T *));
90 static mf_hashitem_T *mf_hash_find __ARGS((mf_hashtab_T *, blocknr_T));
91 static void mf_hash_add_item __ARGS((mf_hashtab_T *, mf_hashitem_T *));
92 static void mf_hash_rem_item __ARGS((mf_hashtab_T *, mf_hashitem_T *));
93 static int mf_hash_grow __ARGS((mf_hashtab_T *));
87 94
88 /* 95 /*
89 * The functions for using a memfile: 96 * The functions for using a memfile:
90 * 97 *
91 * mf_open() open a new or existing memfile 98 * mf_open() open a new or existing memfile
117 mf_open(fname, flags) 124 mf_open(fname, flags)
118 char_u *fname; 125 char_u *fname;
119 int flags; 126 int flags;
120 { 127 {
121 memfile_T *mfp; 128 memfile_T *mfp;
122 int i;
123 off_t size; 129 off_t size;
124 #if defined(STATFS) && defined(UNIX) && !defined(__QNX__) 130 #if defined(STATFS) && defined(UNIX) && !defined(__QNX__)
125 # define USE_FSTATFS 131 # define USE_FSTATFS
126 struct STATFS stf; 132 struct STATFS stf;
127 #endif 133 #endif
150 mfp->mf_free_first = NULL; /* free list is empty */ 156 mfp->mf_free_first = NULL; /* free list is empty */
151 mfp->mf_used_first = NULL; /* used list is empty */ 157 mfp->mf_used_first = NULL; /* used list is empty */
152 mfp->mf_used_last = NULL; 158 mfp->mf_used_last = NULL;
153 mfp->mf_dirty = FALSE; 159 mfp->mf_dirty = FALSE;
154 mfp->mf_used_count = 0; 160 mfp->mf_used_count = 0;
155 for (i = 0; i < MEMHASHSIZE; ++i) 161 mf_hash_init(&mfp->mf_hash);
156 { 162 mf_hash_init(&mfp->mf_trans);
157 mfp->mf_hash[i] = NULL; /* hash lists are empty */
158 mfp->mf_trans[i] = NULL; /* trans lists are empty */
159 }
160 mfp->mf_page_size = MEMFILE_PAGE_SIZE; 163 mfp->mf_page_size = MEMFILE_PAGE_SIZE;
161 #ifdef FEAT_CRYPT 164 #ifdef FEAT_CRYPT
162 mfp->mf_old_key = NULL; 165 mfp->mf_old_key = NULL;
163 #endif 166 #endif
164 167
240 mf_close(mfp, del_file) 243 mf_close(mfp, del_file)
241 memfile_T *mfp; 244 memfile_T *mfp;
242 int del_file; 245 int del_file;
243 { 246 {
244 bhdr_T *hp, *nextp; 247 bhdr_T *hp, *nextp;
245 NR_TRANS *tp, *tpnext;
246 int i;
247 248
248 if (mfp == NULL) /* safety check */ 249 if (mfp == NULL) /* safety check */
249 return; 250 return;
250 if (mfp->mf_fd >= 0) 251 if (mfp->mf_fd >= 0)
251 { 252 {
261 nextp = hp->bh_next; 262 nextp = hp->bh_next;
262 mf_free_bhdr(hp); 263 mf_free_bhdr(hp);
263 } 264 }
264 while (mfp->mf_free_first != NULL) /* free entries in free list */ 265 while (mfp->mf_free_first != NULL) /* free entries in free list */
265 vim_free(mf_rem_free(mfp)); 266 vim_free(mf_rem_free(mfp));
266 for (i = 0; i < MEMHASHSIZE; ++i) /* free entries in trans lists */ 267 mf_hash_free(&mfp->mf_hash);
267 for (tp = mfp->mf_trans[i]; tp != NULL; tp = tpnext) 268 mf_hash_free_all(&mfp->mf_trans); /* free hashtable and its items */
268 {
269 tpnext = tp->nt_next;
270 vim_free(tp);
271 }
272 vim_free(mfp->mf_fname); 269 vim_free(mfp->mf_fname);
273 vim_free(mfp->mf_ffname); 270 vim_free(mfp->mf_ffname);
274 vim_free(mfp); 271 vim_free(mfp);
275 } 272 }
276 273
741 static void 738 static void
742 mf_ins_hash(mfp, hp) 739 mf_ins_hash(mfp, hp)
743 memfile_T *mfp; 740 memfile_T *mfp;
744 bhdr_T *hp; 741 bhdr_T *hp;
745 { 742 {
746 bhdr_T *hhp; 743 mf_hash_add_item(&mfp->mf_hash, (mf_hashitem_T *)hp);
747 int hash;
748
749 hash = MEMHASH(hp->bh_bnum);
750 hhp = mfp->mf_hash[hash];
751 hp->bh_hash_next = hhp;
752 hp->bh_hash_prev = NULL;
753 if (hhp != NULL)
754 hhp->bh_hash_prev = hp;
755 mfp->mf_hash[hash] = hp;
756 } 744 }
757 745
758 /* 746 /*
759 * remove block *hp from hashlist of memfile list *mfp 747 * remove block *hp from hashlist of memfile list *mfp
760 */ 748 */
761 static void 749 static void
762 mf_rem_hash(mfp, hp) 750 mf_rem_hash(mfp, hp)
763 memfile_T *mfp; 751 memfile_T *mfp;
764 bhdr_T *hp; 752 bhdr_T *hp;
765 { 753 {
766 if (hp->bh_hash_prev == NULL) 754 mf_hash_rem_item(&mfp->mf_hash, (mf_hashitem_T *)hp);
767 mfp->mf_hash[MEMHASH(hp->bh_bnum)] = hp->bh_hash_next;
768 else
769 hp->bh_hash_prev->bh_hash_next = hp->bh_hash_next;
770
771 if (hp->bh_hash_next)
772 hp->bh_hash_next->bh_hash_prev = hp->bh_hash_prev;
773 } 755 }
774 756
775 /* 757 /*
776 * look in hash lists of memfile *mfp for block header with number 'nr' 758 * look in hash lists of memfile *mfp for block header with number 'nr'
777 */ 759 */
778 static bhdr_T * 760 static bhdr_T *
779 mf_find_hash(mfp, nr) 761 mf_find_hash(mfp, nr)
780 memfile_T *mfp; 762 memfile_T *mfp;
781 blocknr_T nr; 763 blocknr_T nr;
782 { 764 {
783 bhdr_T *hp; 765 return (bhdr_T *)mf_hash_find(&mfp->mf_hash, nr);
784
785 for (hp = mfp->mf_hash[MEMHASH(nr)]; hp != NULL; hp = hp->bh_hash_next)
786 if (hp->bh_bnum == nr)
787 break;
788 return hp;
789 } 766 }
790 767
791 /* 768 /*
792 * insert block *hp in front of used list of memfile *mfp 769 * insert block *hp in front of used list of memfile *mfp
793 */ 770 */
1185 memfile_T *mfp; 1162 memfile_T *mfp;
1186 bhdr_T *hp; 1163 bhdr_T *hp;
1187 { 1164 {
1188 bhdr_T *freep; 1165 bhdr_T *freep;
1189 blocknr_T new_bnum; 1166 blocknr_T new_bnum;
1190 int hash;
1191 NR_TRANS *np; 1167 NR_TRANS *np;
1192 int page_count; 1168 int page_count;
1193 1169
1194 if (hp->bh_bnum >= 0) /* it's already positive */ 1170 if (hp->bh_bnum >= 0) /* it's already positive */
1195 return OK; 1171 return OK;
1233 1209
1234 mf_rem_hash(mfp, hp); /* remove from old hash list */ 1210 mf_rem_hash(mfp, hp); /* remove from old hash list */
1235 hp->bh_bnum = new_bnum; 1211 hp->bh_bnum = new_bnum;
1236 mf_ins_hash(mfp, hp); /* insert in new hash list */ 1212 mf_ins_hash(mfp, hp); /* insert in new hash list */
1237 1213
1238 hash = MEMHASH(np->nt_old_bnum); /* insert in trans list */ 1214 /* Insert "np" into "mf_trans" hashtable with key "np->nt_old_bnum" */
1239 np->nt_next = mfp->mf_trans[hash]; 1215 mf_hash_add_item(&mfp->mf_trans, (mf_hashitem_T *)np);
1240 mfp->mf_trans[hash] = np;
1241 if (np->nt_next != NULL)
1242 np->nt_next->nt_prev = np;
1243 np->nt_prev = NULL;
1244 1216
1245 return OK; 1217 return OK;
1246 } 1218 }
1247 1219
1248 /* 1220 /*
1253 blocknr_T 1225 blocknr_T
1254 mf_trans_del(mfp, old_nr) 1226 mf_trans_del(mfp, old_nr)
1255 memfile_T *mfp; 1227 memfile_T *mfp;
1256 blocknr_T old_nr; 1228 blocknr_T old_nr;
1257 { 1229 {
1258 int hash;
1259 NR_TRANS *np; 1230 NR_TRANS *np;
1260 blocknr_T new_bnum; 1231 blocknr_T new_bnum;
1261 1232
1262 hash = MEMHASH(old_nr); 1233 np = (NR_TRANS *)mf_hash_find(&mfp->mf_trans, old_nr);
1263 for (np = mfp->mf_trans[hash]; np != NULL; np = np->nt_next) 1234
1264 if (np->nt_old_bnum == old_nr)
1265 break;
1266 if (np == NULL) /* not found */ 1235 if (np == NULL) /* not found */
1267 return old_nr; 1236 return old_nr;
1268 1237
1269 mfp->mf_neg_count--; 1238 mfp->mf_neg_count--;
1270 new_bnum = np->nt_new_bnum; 1239 new_bnum = np->nt_new_bnum;
1271 if (np->nt_prev != NULL) /* remove entry from the trans list */ 1240
1272 np->nt_prev->nt_next = np->nt_next; 1241 /* remove entry from the trans list */
1273 else 1242 mf_hash_rem_item(&mfp->mf_trans, (mf_hashitem_T *)np);
1274 mfp->mf_trans[hash] = np->nt_next; 1243
1275 if (np->nt_next != NULL)
1276 np->nt_next->nt_prev = np->nt_prev;
1277 vim_free(np); 1244 vim_free(np);
1278 1245
1279 return new_bnum; 1246 return new_bnum;
1280 } 1247 }
1281 1248
1395 mch_copy_sec(fname, mfp->mf_fname); 1362 mch_copy_sec(fname, mfp->mf_fname);
1396 #endif 1363 #endif
1397 mch_hide(mfp->mf_fname); /* try setting the 'hidden' flag */ 1364 mch_hide(mfp->mf_fname); /* try setting the 'hidden' flag */
1398 } 1365 }
1399 } 1366 }
1367
1368 /*
1369 * Implementation of mf_hashtab_T follows.
1370 */
1371
1372 /*
1373 * The number of buckets in the hashtable is increased by a factor of
1374 * MHT_GROWTH_FACTOR when the average number of items per bucket
1375 * exceeds 2 ^ MHT_LOG_LOAD_FACTOR.
1376 */
1377 #define MHT_LOG_LOAD_FACTOR 6
1378 #define MHT_GROWTH_FACTOR 2 /* must be a power of two */
1379
1380 /*
1381 * Initialize an empty hash table.
1382 */
1383 static void
1384 mf_hash_init(mht)
1385 mf_hashtab_T *mht;
1386 {
1387 vim_memset(mht, 0, sizeof(mf_hashtab_T));
1388 mht->mht_buckets = mht->mht_small_buckets;
1389 mht->mht_mask = MHT_INIT_SIZE - 1;
1390 }
1391
1392 /*
1393 * Free the array of a hash table. Does not free the items it contains!
1394 * The hash table must not be used again without another mf_hash_init() call.
1395 */
1396 static void
1397 mf_hash_free(mht)
1398 mf_hashtab_T *mht;
1399 {
1400 if (mht->mht_buckets != mht->mht_small_buckets)
1401 vim_free(mht->mht_buckets);
1402 }
1403
1404 /*
1405 * Free the array of a hash table and all the items it contains.
1406 */
1407 static void
1408 mf_hash_free_all(mht)
1409 mf_hashtab_T *mht;
1410 {
1411 long_u idx;
1412 mf_hashitem_T *mhi;
1413 mf_hashitem_T *next;
1414
1415 for (idx = 0; idx <= mht->mht_mask; idx++)
1416 for (mhi = mht->mht_buckets[idx]; mhi != NULL; mhi = next)
1417 {
1418 next = mhi->mhi_next;
1419 vim_free(mhi);
1420 }
1421
1422 mf_hash_free(mht);
1423 }
1424
1425 /*
1426 * Find "key" in hashtable "mht".
1427 * Returns a pointer to a mf_hashitem_T or NULL if the item was not found.
1428 */
1429 static mf_hashitem_T *
1430 mf_hash_find(mht, key)
1431 mf_hashtab_T *mht;
1432 blocknr_T key;
1433 {
1434 mf_hashitem_T *mhi;
1435
1436 mhi = mht->mht_buckets[key & mht->mht_mask];
1437 while (mhi != NULL && mhi->mhi_key != key)
1438 mhi = mhi->mhi_next;
1439
1440 return mhi;
1441 }
1442
1443 /*
1444 * Add item "mhi" to hashtable "mht".
1445 * "mhi" must not be NULL.
1446 */
1447 static void
1448 mf_hash_add_item(mht, mhi)
1449 mf_hashtab_T *mht;
1450 mf_hashitem_T *mhi;
1451 {
1452 long_u idx;
1453
1454 idx = mhi->mhi_key & mht->mht_mask;
1455 mhi->mhi_next = mht->mht_buckets[idx];
1456 mhi->mhi_prev = NULL;
1457 if (mhi->mhi_next != NULL)
1458 mhi->mhi_next->mhi_prev = mhi;
1459 mht->mht_buckets[idx] = mhi;
1460
1461 mht->mht_count++;
1462
1463 /*
1464 * Grow hashtable when we have more thank 2^MHT_LOG_LOAD_FACTOR
1465 * items per bucket on average
1466 */
1467 if (mht->mht_fixed == 0
1468 && (mht->mht_count >> MHT_LOG_LOAD_FACTOR) > mht->mht_mask)
1469 {
1470 if (mf_hash_grow(mht) == FAIL)
1471 {
1472 /* stop trying to grow after first failure to allocate memory */
1473 mht->mht_fixed = 1;
1474 }
1475 }
1476 }
1477
1478 /*
1479 * Remove item "mhi" from hashtable "mht".
1480 * "mhi" must not be NULL and must have been inserted into "mht".
1481 */
1482 static void
1483 mf_hash_rem_item(mht, mhi)
1484 mf_hashtab_T *mht;
1485 mf_hashitem_T *mhi;
1486 {
1487 if (mhi->mhi_prev == NULL)
1488 mht->mht_buckets[mhi->mhi_key & mht->mht_mask] = mhi->mhi_next;
1489 else
1490 mhi->mhi_prev->mhi_next = mhi->mhi_next;
1491
1492 if (mhi->mhi_next != NULL)
1493 mhi->mhi_next->mhi_prev = mhi->mhi_prev;
1494
1495 mht->mht_count--;
1496
1497 /* We could shrink the table here, but it typically takes little memory,
1498 * so why bother? */
1499 }
1500
1501 /*
1502 * Increase number of buckets in the hashtable by MHT_GROWTH_FACTOR and
1503 * rehash items.
1504 * Returns FAIL when out of memory.
1505 */
1506 static int
1507 mf_hash_grow(mht)
1508 mf_hashtab_T *mht;
1509 {
1510 long_u i, j;
1511 int shift;
1512 mf_hashitem_T *mhi;
1513 mf_hashitem_T *tails[MHT_GROWTH_FACTOR];
1514 mf_hashitem_T **buckets;
1515 size_t size;
1516
1517 size = (mht->mht_mask + 1) * MHT_GROWTH_FACTOR * sizeof(void *);
1518 buckets = (mf_hashitem_T **)lalloc_clear(size, FALSE);
1519 if (buckets == NULL)
1520 return FAIL;
1521
1522 shift = 0;
1523 while ((mht->mht_mask >> shift) != 0)
1524 shift++;
1525
1526 for (i = 0; i <= mht->mht_mask; i++)
1527 {
1528 /*
1529 * Traverse the items in the i-th original bucket and move them into
1530 * MHT_GROWTH_FACTOR new buckets, preserving their relative order
1531 * within each new bucket. Preserving the order is important because
1532 * mf_get() tries to keep most recently used items at the front of
1533 * each bucket.
1534 *
1535 * Here we strongly rely on the fact the hashes are computed modulo
1536 * a power of two.
1537 */
1538
1539 vim_memset(tails, 0, sizeof(tails));
1540
1541 for (mhi = mht->mht_buckets[i]; mhi != NULL; mhi = mhi->mhi_next)
1542 {
1543 j = (mhi->mhi_key >> shift) & (MHT_GROWTH_FACTOR - 1);
1544 if (tails[j] == NULL)
1545 {
1546 buckets[i + (j << shift)] = mhi;
1547 tails[j] = mhi;
1548 mhi->mhi_prev = NULL;
1549 }
1550 else
1551 {
1552 tails[j]->mhi_next = mhi;
1553 mhi->mhi_prev = tails[j];
1554 tails[j] = mhi;
1555 }
1556 }
1557
1558 for (j = 0; j < MHT_GROWTH_FACTOR; j++)
1559 if (tails[j] != NULL)
1560 tails[j]->mhi_next = NULL;
1561 }
1562
1563 if (mht->mht_buckets != mht->mht_small_buckets)
1564 vim_free(mht->mht_buckets);
1565
1566 mht->mht_buckets = buckets;
1567 mht->mht_mask = (mht->mht_mask + 1) * MHT_GROWTH_FACTOR - 1;
1568
1569 return OK;
1570 }