120
|
1 /* vi:set ts=8 sts=4 sw=4:
|
|
2 *
|
|
3 * VIM - Vi IMproved by Bram Moolenaar
|
|
4 *
|
|
5 * Do ":help uganda" in Vim to read copying and usage conditions.
|
|
6 * Do ":help credits" in Vim to see a list of people who contributed.
|
|
7 * See README.txt for an overview of the Vim source code.
|
|
8 */
|
|
9
|
|
10 /*
|
|
11 * hashtable.c: Handling of a hashtable with Vim-specific properties.
|
|
12 *
|
|
13 * Each item in a hashtable has a NUL terminated string key. A key can appear
|
|
14 * only once in the table.
|
|
15 *
|
|
16 * A hash number is computed from the key for quick lookup. When the hashes
|
|
17 * of two different keys point to the same entry an algorithm is used to
|
|
18 * iterate over other entries in the table until the right one is found.
|
|
19 * To make the iteration work removed keys are different from entries where a
|
|
20 * key was never present.
|
|
21 *
|
|
22 * The mechanism has been partly based on how Python Dictionaries are
|
|
23 * implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4.
|
|
24 *
|
|
25 * The hashtable grows to accommodate more entries when needed. At least 1/3
|
|
26 * of the entries is empty to keep the lookup efficient (at the cost of extra
|
|
27 * memory).
|
|
28 */
|
|
29
|
|
30 #include "vim.h"
|
|
31
|
|
32 #if defined(FEAT_EVAL) || defined(FEAT_SYN_HL) || defined(PROTO)
|
|
33
|
154
|
34 #if 0
|
242
|
35 # define HT_DEBUG /* extra checks for table consistency and statistics */
|
|
36
|
|
37 static long hash_count_lookup = 0; /* count number of hashtab lookups */
|
|
38 static long hash_count_perturb = 0; /* count number of "misses" */
|
120
|
39 #endif
|
|
40
|
|
41 /* Magic value for algorithm that walks through the array. */
|
|
42 #define PERTURB_SHIFT 5
|
|
43
|
237
|
44 static int hash_may_resize __ARGS((hashtab_T *ht, int minitems));
|
120
|
45
|
242
|
46 #if 0 /* currently not used */
|
120
|
47 /*
|
|
48 * Create an empty hash table.
|
|
49 * Returns NULL when out of memory.
|
|
50 */
|
135
|
51 hashtab_T *
|
120
|
52 hash_create()
|
|
53 {
|
135
|
54 hashtab_T *ht;
|
120
|
55
|
135
|
56 ht = (hashtab_T *)alloc(sizeof(hashtab_T));
|
120
|
57 if (ht != NULL)
|
|
58 hash_init(ht);
|
|
59 return ht;
|
|
60 }
|
|
61 #endif
|
|
62
|
|
63 /*
|
|
64 * Initialize an empty hash table.
|
|
65 */
|
|
66 void
|
|
67 hash_init(ht)
|
135
|
68 hashtab_T *ht;
|
120
|
69 {
|
|
70 /* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */
|
135
|
71 vim_memset(ht, 0, sizeof(hashtab_T));
|
120
|
72 ht->ht_array = ht->ht_smallarray;
|
|
73 ht->ht_mask = HT_INIT_SIZE - 1;
|
|
74 }
|
|
75
|
|
76 /*
|
123
|
77 * Free the array of a hash table. Does not free the items it contains!
|
|
78 * If "ht" is not freed then you should call hash_init() next!
|
120
|
79 */
|
|
80 void
|
123
|
81 hash_clear(ht)
|
135
|
82 hashtab_T *ht;
|
120
|
83 {
|
|
84 if (ht->ht_array != ht->ht_smallarray)
|
|
85 vim_free(ht->ht_array);
|
|
86 }
|
|
87
|
|
88 /*
|
|
89 * Find "key" in hashtable "ht". "key" must not be NULL.
|
|
90 * Always returns a pointer to a hashitem. If the item was not found then
|
|
91 * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key
|
|
92 * would be added.
|
|
93 * WARNING: The returned pointer becomes invalid when the hashtable is changed
|
|
94 * (adding, setting or removing an item)!
|
|
95 */
|
135
|
96 hashitem_T *
|
120
|
97 hash_find(ht, key)
|
135
|
98 hashtab_T *ht;
|
120
|
99 char_u *key;
|
|
100 {
|
|
101 return hash_lookup(ht, key, hash_hash(key));
|
|
102 }
|
|
103
|
|
104 /*
|
|
105 * Like hash_find(), but caller computes "hash".
|
|
106 */
|
135
|
107 hashitem_T *
|
120
|
108 hash_lookup(ht, key, hash)
|
135
|
109 hashtab_T *ht;
|
120
|
110 char_u *key;
|
135
|
111 hash_T hash;
|
120
|
112 {
|
135
|
113 hash_T perturb;
|
|
114 hashitem_T *freeitem;
|
|
115 hashitem_T *hi;
|
120
|
116 int idx;
|
|
117
|
242
|
118 #ifdef HT_DEBUG
|
|
119 ++hash_count_lookup;
|
|
120 #endif
|
|
121
|
120
|
122 /*
|
|
123 * Quickly handle the most common situations:
|
|
124 * - return if there is no item at all
|
|
125 * - skip over a removed item
|
|
126 * - return if the item matches
|
|
127 */
|
|
128 idx = hash & ht->ht_mask;
|
|
129 hi = &ht->ht_array[idx];
|
|
130
|
|
131 if (hi->hi_key == NULL)
|
|
132 return hi;
|
|
133 if (hi->hi_key == HI_KEY_REMOVED)
|
|
134 freeitem = hi;
|
|
135 else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0)
|
|
136 return hi;
|
|
137 else
|
|
138 freeitem = NULL;
|
|
139
|
|
140 /*
|
|
141 * Need to search through the table to find the key. The algorithm
|
|
142 * to step through the table starts with large steps, gradually becoming
|
|
143 * smaller down to (1/4 table size + 1). This means it goes through all
|
|
144 * table entries in the end.
|
|
145 * When we run into a NULL key it's clear that the key isn't there.
|
|
146 * Return the first available slot found (can be a slot of a removed
|
|
147 * item).
|
|
148 */
|
|
149 for (perturb = hash; ; perturb >>= PERTURB_SHIFT)
|
|
150 {
|
242
|
151 #ifdef HT_DEBUG
|
|
152 ++hash_count_perturb; /* count a "miss" for hashtab lookup */
|
|
153 #endif
|
120
|
154 idx = (idx << 2) + idx + perturb + 1;
|
|
155 hi = &ht->ht_array[idx & ht->ht_mask];
|
|
156 if (hi->hi_key == NULL)
|
|
157 return freeitem == NULL ? hi : freeitem;
|
|
158 if (hi->hi_hash == hash
|
|
159 && hi->hi_key != HI_KEY_REMOVED
|
|
160 && STRCMP(hi->hi_key, key) == 0)
|
|
161 return hi;
|
|
162 if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL)
|
|
163 freeitem = hi;
|
|
164 }
|
|
165 }
|
|
166
|
|
167 /*
|
242
|
168 * Print the efficiency of hashtable lookups.
|
|
169 * Useful when trying different hash algorithms.
|
|
170 * Called when exiting.
|
|
171 */
|
|
172 void
|
|
173 hash_debug_results()
|
|
174 {
|
|
175 #ifdef HT_DEBUG
|
|
176 fprintf(stderr, "\r\n\r\n\r\n\r\n");
|
|
177 fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup);
|
|
178 fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb);
|
|
179 fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n",
|
|
180 hash_count_perturb * 100 / hash_count_lookup);
|
|
181 #endif
|
|
182 }
|
|
183
|
|
184 /*
|
120
|
185 * Add item with key "key" to hashtable "ht".
|
|
186 * Returns FAIL when out of memory or the key is already present.
|
|
187 */
|
|
188 int
|
|
189 hash_add(ht, key)
|
135
|
190 hashtab_T *ht;
|
120
|
191 char_u *key;
|
|
192 {
|
135
|
193 hash_T hash = hash_hash(key);
|
|
194 hashitem_T *hi;
|
120
|
195
|
|
196 hi = hash_lookup(ht, key, hash);
|
|
197 if (!HASHITEM_EMPTY(hi))
|
|
198 {
|
|
199 EMSG2(_(e_intern2), "hash_add()");
|
|
200 return FAIL;
|
|
201 }
|
|
202 return hash_add_item(ht, hi, key, hash);
|
|
203 }
|
|
204
|
|
205 /*
|
|
206 * Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and
|
|
207 * "hi" must have been obtained with hash_lookup() and point to an empty item.
|
|
208 * "hi" is invalid after this!
|
|
209 * Returns OK or FAIL (out of memory).
|
|
210 */
|
135
|
211 int
|
120
|
212 hash_add_item(ht, hi, key, hash)
|
135
|
213 hashtab_T *ht;
|
|
214 hashitem_T *hi;
|
120
|
215 char_u *key;
|
135
|
216 hash_T hash;
|
120
|
217 {
|
|
218 /* If resizing failed before and it fails again we can't add an item. */
|
237
|
219 if (ht->ht_error && hash_may_resize(ht, 0) == FAIL)
|
120
|
220 return FAIL;
|
|
221
|
|
222 ++ht->ht_used;
|
|
223 if (hi->hi_key == NULL)
|
|
224 ++ht->ht_filled;
|
|
225 hi->hi_key = key;
|
|
226 hi->hi_hash = hash;
|
|
227
|
|
228 /* When the space gets low may resize the array. */
|
237
|
229 return hash_may_resize(ht, 0);
|
120
|
230 }
|
|
231
|
|
232 #if 0 /* not used */
|
|
233 /*
|
|
234 * Overwrite hashtable item "hi" with "key". "hi" must point to the item that
|
|
235 * is to be overwritten. Thus the number of items in the hashtable doesn't
|
|
236 * change.
|
|
237 * Although the key must be identical, the pointer may be different, thus it's
|
|
238 * set anyway (the key is part of an item with that key).
|
|
239 * The caller must take care of freeing the old item.
|
|
240 * "hi" is invalid after this!
|
|
241 */
|
|
242 void
|
|
243 hash_set(hi, key)
|
135
|
244 hashitem_T *hi;
|
120
|
245 char_u *key;
|
|
246 {
|
|
247 hi->hi_key = key;
|
|
248 }
|
|
249 #endif
|
|
250
|
|
251 /*
|
|
252 * Remove item "hi" from hashtable "ht". "hi" must have been obtained with
|
123
|
253 * hash_lookup() and point to an empty item.
|
|
254 * The caller must take care of freeing the item itself.
|
120
|
255 */
|
|
256 void
|
|
257 hash_remove(ht, hi)
|
135
|
258 hashtab_T *ht;
|
|
259 hashitem_T *hi;
|
120
|
260 {
|
|
261 --ht->ht_used;
|
|
262 hi->hi_key = HI_KEY_REMOVED;
|
237
|
263 hash_may_resize(ht, 0);
|
120
|
264 }
|
|
265
|
|
266 /*
|
123
|
267 * Lock a hashtable: prevent that ht_array changes.
|
|
268 * Don't use this when items are to be added!
|
|
269 * Must call hash_unlock() later.
|
|
270 */
|
|
271 void
|
|
272 hash_lock(ht)
|
135
|
273 hashtab_T *ht;
|
123
|
274 {
|
|
275 ++ht->ht_locked;
|
|
276 }
|
|
277
|
301
|
278 #if 0 /* currently not used */
|
123
|
279 /*
|
237
|
280 * Lock a hashtable at the specified number of entries.
|
|
281 * Caller must make sure no more than "size" entries will be added.
|
|
282 * Must call hash_unlock() later.
|
|
283 */
|
|
284 void
|
|
285 hash_lock_size(ht, size)
|
|
286 hashtab_T *ht;
|
|
287 int size;
|
|
288 {
|
|
289 (void)hash_may_resize(ht, size);
|
|
290 ++ht->ht_locked;
|
|
291 }
|
301
|
292 #endif
|
237
|
293
|
|
294 /*
|
123
|
295 * Unlock a hashtable: allow ht_array changes again.
|
|
296 * Table will be resized (shrink) when necessary.
|
|
297 * This must balance a call to hash_lock().
|
|
298 */
|
|
299 void
|
|
300 hash_unlock(ht)
|
135
|
301 hashtab_T *ht;
|
123
|
302 {
|
|
303 --ht->ht_locked;
|
237
|
304 (void)hash_may_resize(ht, 0);
|
123
|
305 }
|
|
306
|
|
307 /*
|
120
|
308 * Shrink a hashtable when there is too much empty space.
|
|
309 * Grow a hashtable when there is not enough empty space.
|
|
310 * Returns OK or FAIL (out of memory).
|
|
311 */
|
|
312 static int
|
237
|
313 hash_may_resize(ht, minitems)
|
135
|
314 hashtab_T *ht;
|
237
|
315 int minitems; /* minimal number of items */
|
120
|
316 {
|
135
|
317 hashitem_T temparray[HT_INIT_SIZE];
|
|
318 hashitem_T *oldarray, *newarray;
|
|
319 hashitem_T *olditem, *newitem;
|
120
|
320 int newi;
|
|
321 int todo;
|
|
322 long_u oldsize, newsize;
|
|
323 long_u minsize;
|
|
324 long_u newmask;
|
135
|
325 hash_T perturb;
|
120
|
326
|
123
|
327 /* Don't resize a locked table. */
|
|
328 if (ht->ht_locked > 0)
|
|
329 return OK;
|
|
330
|
120
|
331 #ifdef HT_DEBUG
|
|
332 if (ht->ht_used > ht->ht_filled)
|
|
333 EMSG("hash_may_resize(): more used than filled");
|
|
334 if (ht->ht_filled >= ht->ht_mask + 1)
|
|
335 EMSG("hash_may_resize(): table completely filled");
|
|
336 #endif
|
|
337
|
237
|
338 if (minitems == 0)
|
|
339 {
|
|
340 /* Return quickly for small tables with at least two NULL items. NULL
|
|
341 * items are required for the lookup to decide a key isn't there. */
|
|
342 if (ht->ht_filled < HT_INIT_SIZE - 1
|
|
343 && ht->ht_array == ht->ht_smallarray)
|
|
344 return OK;
|
120
|
345
|
237
|
346 /*
|
|
347 * Grow or refill the array when it's more than 2/3 full (including
|
|
348 * removed items, so that they get cleaned up).
|
|
349 * Shrink the array when it's less than 1/5 full. When growing it is
|
|
350 * at least 1/4 full (avoids repeated grow-shrink operations)
|
|
351 */
|
|
352 oldsize = ht->ht_mask + 1;
|
|
353 if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5)
|
|
354 return OK;
|
120
|
355
|
237
|
356 if (ht->ht_used > 1000)
|
|
357 minsize = ht->ht_used * 2; /* it's big, don't make too much room */
|
|
358 else
|
|
359 minsize = ht->ht_used * 4; /* make plenty of room */
|
|
360 }
|
120
|
361 else
|
237
|
362 {
|
|
363 /* Use specified size. */
|
242
|
364 if ((long_u)minitems < ht->ht_used) /* just in case... */
|
237
|
365 minitems = ht->ht_used;
|
|
366 minsize = minitems * 3 / 2; /* array is up to 2/3 full */
|
|
367 }
|
|
368
|
120
|
369 newsize = HT_INIT_SIZE;
|
|
370 while (newsize < minsize)
|
|
371 {
|
|
372 newsize <<= 1; /* make sure it's always a power of 2 */
|
|
373 if (newsize == 0)
|
|
374 return FAIL; /* overflow */
|
|
375 }
|
|
376
|
|
377 if (newsize == HT_INIT_SIZE)
|
|
378 {
|
|
379 /* Use the small array inside the hashdict structure. */
|
|
380 newarray = ht->ht_smallarray;
|
|
381 if (ht->ht_array == newarray)
|
|
382 {
|
|
383 /* Moving from ht_smallarray to ht_smallarray! Happens when there
|
|
384 * are many removed items. Copy the items to be able to clean up
|
|
385 * removed items. */
|
|
386 mch_memmove(temparray, newarray, sizeof(temparray));
|
|
387 oldarray = temparray;
|
|
388 }
|
|
389 else
|
|
390 oldarray = ht->ht_array;
|
|
391 }
|
|
392 else
|
|
393 {
|
|
394 /* Allocate an array. */
|
140
|
395 newarray = (hashitem_T *)alloc((unsigned)
|
|
396 (sizeof(hashitem_T) * newsize));
|
120
|
397 if (newarray == NULL)
|
|
398 {
|
|
399 /* Out of memory. When there are NULL items still return OK.
|
|
400 * Otherwise set ht_error, because lookup may result in a hang if
|
|
401 * we add another item. */
|
|
402 if (ht->ht_filled < ht->ht_mask)
|
|
403 return OK;
|
|
404 ht->ht_error = TRUE;
|
|
405 return FAIL;
|
|
406 }
|
|
407 oldarray = ht->ht_array;
|
|
408 }
|
135
|
409 vim_memset(newarray, 0, (size_t)(sizeof(hashitem_T) * newsize));
|
120
|
410
|
|
411 /*
|
|
412 * Move all the items from the old array to the new one, placing them in
|
|
413 * the right spot. The new array won't have any removed items, thus this
|
|
414 * is also a cleanup action.
|
|
415 */
|
|
416 newmask = newsize - 1;
|
|
417 todo = ht->ht_used;
|
|
418 for (olditem = oldarray; todo > 0; ++olditem)
|
154
|
419 if (!HASHITEM_EMPTY(olditem))
|
120
|
420 {
|
|
421 /*
|
|
422 * The algorithm to find the spot to add the item is identical to
|
|
423 * the algorithm to find an item in hash_lookup(). But we only
|
|
424 * need to search for a NULL key, thus it's simpler.
|
|
425 */
|
|
426 newi = olditem->hi_hash & newmask;
|
|
427 newitem = &newarray[newi];
|
|
428
|
|
429 if (newitem->hi_key != NULL)
|
|
430 for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT)
|
|
431 {
|
|
432 newi = (newi << 2) + newi + perturb + 1;
|
|
433 newitem = &newarray[newi & newmask];
|
|
434 if (newitem->hi_key == NULL)
|
|
435 break;
|
|
436 }
|
|
437 *newitem = *olditem;
|
|
438 --todo;
|
|
439 }
|
|
440
|
|
441 if (ht->ht_array != ht->ht_smallarray)
|
|
442 vim_free(ht->ht_array);
|
|
443 ht->ht_array = newarray;
|
|
444 ht->ht_mask = newmask;
|
|
445 ht->ht_filled = ht->ht_used;
|
|
446 ht->ht_error = FALSE;
|
|
447
|
|
448 return OK;
|
|
449 }
|
|
450
|
|
451 /*
|
242
|
452 * Get the hash number for a key.
|
|
453 * If you think you know a better hash function: Compile with HT_DEBUG set and
|
|
454 * run a script that uses hashtables a lot. Vim will then print statistics
|
|
455 * when exiting. Try that with the current hash algorithm and yours. The
|
|
456 * lower the percentage the better.
|
120
|
457 */
|
135
|
458 hash_T
|
120
|
459 hash_hash(key)
|
|
460 char_u *key;
|
|
461 {
|
242
|
462 hash_T hash;
|
|
463 char_u *p;
|
|
464
|
|
465 if ((hash = *key) == 0)
|
|
466 return (hash_T)0; /* Empty keys are not allowed, but we don't
|
|
467 want to crash if we get one. */
|
|
468 p = key + 1;
|
|
469
|
|
470 #if 0
|
|
471 /* ElfHash algorithm, which is supposed to have an even distribution.
|
|
472 * Suggested by Charles Campbell. */
|
135
|
473 hash_T g;
|
120
|
474
|
|
475 while (*p != NUL)
|
|
476 {
|
|
477 hash = (hash << 4) + *p++; /* clear low 4 bits of hash, add char */
|
|
478 g = hash & 0xf0000000L; /* g has high 4 bits of hash only */
|
|
479 if (g != 0)
|
|
480 hash ^= g >> 24; /* xor g's high 4 bits into hash */
|
|
481 }
|
242
|
482 #else
|
|
483
|
|
484 /* A simplistic algorithm that appears to do very well.
|
|
485 * Suggested by George Reilly. */
|
|
486 while (*p != NUL)
|
|
487 hash = hash * 101 + *p++;
|
|
488 #endif
|
120
|
489
|
|
490 return hash;
|
|
491 }
|
|
492
|
|
493 #endif
|