Mercurial > vim
annotate src/hashtab.c @ 14384:6f01954c86e5
Added tag v8.1.0206 for changeset 8846b83444309722d72d62961e6eebe4df5846f2
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Mon, 23 Jul 2018 05:00:06 +0200 |
parents | 9279c939391b |
children | 55ccc2d353bd |
rev | line source |
---|---|
10042
4aead6a9b7a9
commit https://github.com/vim/vim/commit/edf3f97ae2af024708ebb4ac614227327033ca47
Christian Brabandt <cb@256bit.org>
parents:
9513
diff
changeset
|
1 /* vi:set ts=8 sts=4 sw=4 noet: |
799 | 2 * |
3 * VIM - Vi IMproved by Bram Moolenaar | |
4 * | |
5 * Do ":help uganda" in Vim to read copying and usage conditions. | |
6 * Do ":help credits" in Vim to see a list of people who contributed. | |
7 * See README.txt for an overview of the Vim source code. | |
8 */ | |
9 | |
10 /* | |
11 * hashtab.c: Handling of a hashtable with Vim-specific properties. | |
12 * | |
13 * Each item in a hashtable has a NUL terminated string key. A key can appear | |
14 * only once in the table. | |
15 * | |
16 * A hash number is computed from the key for quick lookup. When the hashes | |
17 * of two different keys point to the same entry an algorithm is used to | |
18 * iterate over other entries in the table until the right one is found. | |
19 * To make the iteration work removed keys are different from entries where a | |
20 * key was never present. | |
21 * | |
22 * The mechanism has been partly based on how Python Dictionaries are | |
23 * implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4. | |
24 * | |
25 * The hashtable grows to accommodate more entries when needed. At least 1/3 | |
26 * of the entries is empty to keep the lookup efficient (at the cost of extra | |
27 * memory). | |
28 */ | |
29 | |
30 #include "vim.h" | |
31 | |
32 #if 0 | |
33 # define HT_DEBUG /* extra checks for table consistency and statistics */ | |
34 | |
35 static long hash_count_lookup = 0; /* count number of hashtab lookups */ | |
36 static long hash_count_perturb = 0; /* count number of "misses" */ | |
37 #endif | |
38 | |
39 /* Magic value for algorithm that walks through the array. */ | |
40 #define PERTURB_SHIFT 5 | |
41 | |
7803
37c929c4a073
commit https://github.com/vim/vim/commit/92b8b2d307e34117f146319872010b0ccc9d2713
Christian Brabandt <cb@256bit.org>
parents:
3970
diff
changeset
|
42 static int hash_may_resize(hashtab_T *ht, int minitems); |
799 | 43 |
44 #if 0 /* currently not used */ | |
45 /* | |
46 * Create an empty hash table. | |
47 * Returns NULL when out of memory. | |
48 */ | |
49 hashtab_T * | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
50 hash_create(void) |
799 | 51 { |
52 hashtab_T *ht; | |
53 | |
54 ht = (hashtab_T *)alloc(sizeof(hashtab_T)); | |
55 if (ht != NULL) | |
56 hash_init(ht); | |
57 return ht; | |
58 } | |
59 #endif | |
60 | |
61 /* | |
62 * Initialize an empty hash table. | |
63 */ | |
64 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
65 hash_init(hashtab_T *ht) |
799 | 66 { |
67 /* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */ | |
68 vim_memset(ht, 0, sizeof(hashtab_T)); | |
69 ht->ht_array = ht->ht_smallarray; | |
70 ht->ht_mask = HT_INIT_SIZE - 1; | |
71 } | |
72 | |
73 /* | |
74 * Free the array of a hash table. Does not free the items it contains! | |
75 * If "ht" is not freed then you should call hash_init() next! | |
76 */ | |
77 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
78 hash_clear(hashtab_T *ht) |
799 | 79 { |
80 if (ht->ht_array != ht->ht_smallarray) | |
81 vim_free(ht->ht_array); | |
82 } | |
83 | |
84 /* | |
85 * Free the array of a hash table and all the keys it contains. The keys must | |
86 * have been allocated. "off" is the offset from the start of the allocate | |
87 * memory to the location of the key (it's always positive). | |
88 */ | |
89 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
90 hash_clear_all(hashtab_T *ht, int off) |
799 | 91 { |
835 | 92 long todo; |
799 | 93 hashitem_T *hi; |
94 | |
835 | 95 todo = (long)ht->ht_used; |
799 | 96 for (hi = ht->ht_array; todo > 0; ++hi) |
97 { | |
98 if (!HASHITEM_EMPTY(hi)) | |
99 { | |
100 vim_free(hi->hi_key - off); | |
101 --todo; | |
102 } | |
103 } | |
104 hash_clear(ht); | |
105 } | |
106 | |
107 /* | |
108 * Find "key" in hashtable "ht". "key" must not be NULL. | |
109 * Always returns a pointer to a hashitem. If the item was not found then | |
110 * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key | |
111 * would be added. | |
112 * WARNING: The returned pointer becomes invalid when the hashtable is changed | |
113 * (adding, setting or removing an item)! | |
114 */ | |
115 hashitem_T * | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
116 hash_find(hashtab_T *ht, char_u *key) |
799 | 117 { |
118 return hash_lookup(ht, key, hash_hash(key)); | |
119 } | |
120 | |
121 /* | |
122 * Like hash_find(), but caller computes "hash". | |
123 */ | |
124 hashitem_T * | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
125 hash_lookup(hashtab_T *ht, char_u *key, hash_T hash) |
799 | 126 { |
127 hash_T perturb; | |
128 hashitem_T *freeitem; | |
129 hashitem_T *hi; | |
3970 | 130 unsigned idx; |
799 | 131 |
132 #ifdef HT_DEBUG | |
133 ++hash_count_lookup; | |
134 #endif | |
135 | |
136 /* | |
137 * Quickly handle the most common situations: | |
138 * - return if there is no item at all | |
139 * - skip over a removed item | |
140 * - return if the item matches | |
141 */ | |
3970 | 142 idx = (unsigned)(hash & ht->ht_mask); |
799 | 143 hi = &ht->ht_array[idx]; |
144 | |
145 if (hi->hi_key == NULL) | |
146 return hi; | |
147 if (hi->hi_key == HI_KEY_REMOVED) | |
148 freeitem = hi; | |
149 else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0) | |
150 return hi; | |
151 else | |
152 freeitem = NULL; | |
153 | |
154 /* | |
155 * Need to search through the table to find the key. The algorithm | |
156 * to step through the table starts with large steps, gradually becoming | |
157 * smaller down to (1/4 table size + 1). This means it goes through all | |
158 * table entries in the end. | |
159 * When we run into a NULL key it's clear that the key isn't there. | |
160 * Return the first available slot found (can be a slot of a removed | |
161 * item). | |
162 */ | |
163 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) | |
164 { | |
165 #ifdef HT_DEBUG | |
166 ++hash_count_perturb; /* count a "miss" for hashtab lookup */ | |
167 #endif | |
3970 | 168 idx = (unsigned)((idx << 2U) + idx + perturb + 1U); |
799 | 169 hi = &ht->ht_array[idx & ht->ht_mask]; |
170 if (hi->hi_key == NULL) | |
171 return freeitem == NULL ? hi : freeitem; | |
172 if (hi->hi_hash == hash | |
173 && hi->hi_key != HI_KEY_REMOVED | |
174 && STRCMP(hi->hi_key, key) == 0) | |
175 return hi; | |
176 if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL) | |
177 freeitem = hi; | |
178 } | |
179 } | |
180 | |
181 /* | |
182 * Print the efficiency of hashtable lookups. | |
183 * Useful when trying different hash algorithms. | |
184 * Called when exiting. | |
185 */ | |
186 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
187 hash_debug_results(void) |
799 | 188 { |
189 #ifdef HT_DEBUG | |
190 fprintf(stderr, "\r\n\r\n\r\n\r\n"); | |
191 fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup); | |
192 fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb); | |
193 fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n", | |
194 hash_count_perturb * 100 / hash_count_lookup); | |
195 #endif | |
196 } | |
197 | |
198 /* | |
199 * Add item with key "key" to hashtable "ht". | |
200 * Returns FAIL when out of memory or the key is already present. | |
201 */ | |
202 int | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
203 hash_add(hashtab_T *ht, char_u *key) |
799 | 204 { |
205 hash_T hash = hash_hash(key); | |
206 hashitem_T *hi; | |
207 | |
208 hi = hash_lookup(ht, key, hash); | |
209 if (!HASHITEM_EMPTY(hi)) | |
210 { | |
10359
66f1b5bf3fa6
commit https://github.com/vim/vim/commit/95f096030ed1a8afea028f2ea295d6f6a70f466f
Christian Brabandt <cb@256bit.org>
parents:
10042
diff
changeset
|
211 internal_error("hash_add()"); |
799 | 212 return FAIL; |
213 } | |
214 return hash_add_item(ht, hi, key, hash); | |
215 } | |
216 | |
217 /* | |
218 * Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and | |
219 * "hi" must have been obtained with hash_lookup() and point to an empty item. | |
220 * "hi" is invalid after this! | |
221 * Returns OK or FAIL (out of memory). | |
222 */ | |
223 int | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
224 hash_add_item( |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
225 hashtab_T *ht, |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
226 hashitem_T *hi, |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
227 char_u *key, |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
228 hash_T hash) |
799 | 229 { |
230 /* If resizing failed before and it fails again we can't add an item. */ | |
231 if (ht->ht_error && hash_may_resize(ht, 0) == FAIL) | |
232 return FAIL; | |
233 | |
234 ++ht->ht_used; | |
235 if (hi->hi_key == NULL) | |
236 ++ht->ht_filled; | |
237 hi->hi_key = key; | |
238 hi->hi_hash = hash; | |
239 | |
240 /* When the space gets low may resize the array. */ | |
241 return hash_may_resize(ht, 0); | |
242 } | |
243 | |
244 #if 0 /* not used */ | |
245 /* | |
246 * Overwrite hashtable item "hi" with "key". "hi" must point to the item that | |
247 * is to be overwritten. Thus the number of items in the hashtable doesn't | |
248 * change. | |
249 * Although the key must be identical, the pointer may be different, thus it's | |
250 * set anyway (the key is part of an item with that key). | |
251 * The caller must take care of freeing the old item. | |
252 * "hi" is invalid after this! | |
253 */ | |
254 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
255 hash_set(hashitem_T *hi, char_u *key) |
799 | 256 { |
257 hi->hi_key = key; | |
258 } | |
259 #endif | |
260 | |
261 /* | |
262 * Remove item "hi" from hashtable "ht". "hi" must have been obtained with | |
263 * hash_lookup(). | |
264 * The caller must take care of freeing the item itself. | |
265 */ | |
266 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
267 hash_remove(hashtab_T *ht, hashitem_T *hi) |
799 | 268 { |
269 --ht->ht_used; | |
270 hi->hi_key = HI_KEY_REMOVED; | |
271 hash_may_resize(ht, 0); | |
272 } | |
273 | |
274 /* | |
275 * Lock a hashtable: prevent that ht_array changes. | |
276 * Don't use this when items are to be added! | |
277 * Must call hash_unlock() later. | |
278 */ | |
279 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
280 hash_lock(hashtab_T *ht) |
799 | 281 { |
282 ++ht->ht_locked; | |
283 } | |
284 | |
285 #if 0 /* currently not used */ | |
286 /* | |
287 * Lock a hashtable at the specified number of entries. | |
288 * Caller must make sure no more than "size" entries will be added. | |
289 * Must call hash_unlock() later. | |
290 */ | |
291 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
292 hash_lock_size(hashtab_T *ht, int size) |
799 | 293 { |
294 (void)hash_may_resize(ht, size); | |
295 ++ht->ht_locked; | |
296 } | |
297 #endif | |
298 | |
299 /* | |
300 * Unlock a hashtable: allow ht_array changes again. | |
301 * Table will be resized (shrink) when necessary. | |
302 * This must balance a call to hash_lock(). | |
303 */ | |
304 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
305 hash_unlock(hashtab_T *ht) |
799 | 306 { |
307 --ht->ht_locked; | |
308 (void)hash_may_resize(ht, 0); | |
309 } | |
310 | |
311 /* | |
312 * Shrink a hashtable when there is too much empty space. | |
313 * Grow a hashtable when there is not enough empty space. | |
314 * Returns OK or FAIL (out of memory). | |
315 */ | |
316 static int | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
317 hash_may_resize( |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
318 hashtab_T *ht, |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
319 int minitems) /* minimal number of items */ |
799 | 320 { |
321 hashitem_T temparray[HT_INIT_SIZE]; | |
322 hashitem_T *oldarray, *newarray; | |
323 hashitem_T *olditem, *newitem; | |
3970 | 324 unsigned newi; |
799 | 325 int todo; |
326 long_u oldsize, newsize; | |
327 long_u minsize; | |
328 long_u newmask; | |
329 hash_T perturb; | |
330 | |
331 /* Don't resize a locked table. */ | |
332 if (ht->ht_locked > 0) | |
333 return OK; | |
334 | |
335 #ifdef HT_DEBUG | |
336 if (ht->ht_used > ht->ht_filled) | |
337 EMSG("hash_may_resize(): more used than filled"); | |
338 if (ht->ht_filled >= ht->ht_mask + 1) | |
339 EMSG("hash_may_resize(): table completely filled"); | |
340 #endif | |
341 | |
342 if (minitems == 0) | |
343 { | |
344 /* Return quickly for small tables with at least two NULL items. NULL | |
345 * items are required for the lookup to decide a key isn't there. */ | |
346 if (ht->ht_filled < HT_INIT_SIZE - 1 | |
347 && ht->ht_array == ht->ht_smallarray) | |
348 return OK; | |
349 | |
350 /* | |
351 * Grow or refill the array when it's more than 2/3 full (including | |
352 * removed items, so that they get cleaned up). | |
353 * Shrink the array when it's less than 1/5 full. When growing it is | |
354 * at least 1/4 full (avoids repeated grow-shrink operations) | |
355 */ | |
356 oldsize = ht->ht_mask + 1; | |
357 if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5) | |
358 return OK; | |
359 | |
360 if (ht->ht_used > 1000) | |
361 minsize = ht->ht_used * 2; /* it's big, don't make too much room */ | |
362 else | |
363 minsize = ht->ht_used * 4; /* make plenty of room */ | |
364 } | |
365 else | |
366 { | |
367 /* Use specified size. */ | |
368 if ((long_u)minitems < ht->ht_used) /* just in case... */ | |
835 | 369 minitems = (int)ht->ht_used; |
799 | 370 minsize = minitems * 3 / 2; /* array is up to 2/3 full */ |
371 } | |
372 | |
373 newsize = HT_INIT_SIZE; | |
374 while (newsize < minsize) | |
375 { | |
376 newsize <<= 1; /* make sure it's always a power of 2 */ | |
377 if (newsize == 0) | |
378 return FAIL; /* overflow */ | |
379 } | |
380 | |
381 if (newsize == HT_INIT_SIZE) | |
382 { | |
383 /* Use the small array inside the hashdict structure. */ | |
384 newarray = ht->ht_smallarray; | |
385 if (ht->ht_array == newarray) | |
386 { | |
387 /* Moving from ht_smallarray to ht_smallarray! Happens when there | |
388 * are many removed items. Copy the items to be able to clean up | |
389 * removed items. */ | |
390 mch_memmove(temparray, newarray, sizeof(temparray)); | |
391 oldarray = temparray; | |
392 } | |
393 else | |
394 oldarray = ht->ht_array; | |
395 } | |
396 else | |
397 { | |
398 /* Allocate an array. */ | |
399 newarray = (hashitem_T *)alloc((unsigned) | |
400 (sizeof(hashitem_T) * newsize)); | |
401 if (newarray == NULL) | |
402 { | |
403 /* Out of memory. When there are NULL items still return OK. | |
404 * Otherwise set ht_error, because lookup may result in a hang if | |
405 * we add another item. */ | |
406 if (ht->ht_filled < ht->ht_mask) | |
407 return OK; | |
408 ht->ht_error = TRUE; | |
409 return FAIL; | |
410 } | |
411 oldarray = ht->ht_array; | |
412 } | |
413 vim_memset(newarray, 0, (size_t)(sizeof(hashitem_T) * newsize)); | |
414 | |
415 /* | |
416 * Move all the items from the old array to the new one, placing them in | |
417 * the right spot. The new array won't have any removed items, thus this | |
418 * is also a cleanup action. | |
419 */ | |
420 newmask = newsize - 1; | |
835 | 421 todo = (int)ht->ht_used; |
799 | 422 for (olditem = oldarray; todo > 0; ++olditem) |
423 if (!HASHITEM_EMPTY(olditem)) | |
424 { | |
425 /* | |
426 * The algorithm to find the spot to add the item is identical to | |
427 * the algorithm to find an item in hash_lookup(). But we only | |
428 * need to search for a NULL key, thus it's simpler. | |
429 */ | |
3970 | 430 newi = (unsigned)(olditem->hi_hash & newmask); |
799 | 431 newitem = &newarray[newi]; |
432 | |
433 if (newitem->hi_key != NULL) | |
434 for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT) | |
435 { | |
3970 | 436 newi = (unsigned)((newi << 2U) + newi + perturb + 1U); |
799 | 437 newitem = &newarray[newi & newmask]; |
438 if (newitem->hi_key == NULL) | |
439 break; | |
440 } | |
441 *newitem = *olditem; | |
442 --todo; | |
443 } | |
444 | |
445 if (ht->ht_array != ht->ht_smallarray) | |
446 vim_free(ht->ht_array); | |
447 ht->ht_array = newarray; | |
448 ht->ht_mask = newmask; | |
449 ht->ht_filled = ht->ht_used; | |
450 ht->ht_error = FALSE; | |
451 | |
452 return OK; | |
453 } | |
454 | |
455 /* | |
456 * Get the hash number for a key. | |
457 * If you think you know a better hash function: Compile with HT_DEBUG set and | |
458 * run a script that uses hashtables a lot. Vim will then print statistics | |
459 * when exiting. Try that with the current hash algorithm and yours. The | |
460 * lower the percentage the better. | |
461 */ | |
462 hash_T | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
463 hash_hash(char_u *key) |
799 | 464 { |
465 hash_T hash; | |
466 char_u *p; | |
467 | |
468 if ((hash = *key) == 0) | |
8839
9fa567d13551
commit https://github.com/vim/vim/commit/0921ecff1c5a74541bad6c073e8ade32247403d8
Christian Brabandt <cb@256bit.org>
parents:
7823
diff
changeset
|
469 return (hash_T)0; |
799 | 470 p = key + 1; |
471 | |
472 /* A simplistic algorithm that appears to do very well. | |
473 * Suggested by George Reilly. */ | |
474 while (*p != NUL) | |
475 hash = hash * 101 + *p++; | |
476 | |
477 return hash; | |
478 } |