799
|
1 /* vi:set ts=8 sts=4 sw=4:
|
|
2 *
|
|
3 * VIM - Vi IMproved by Bram Moolenaar
|
|
4 *
|
|
5 * Do ":help uganda" in Vim to read copying and usage conditions.
|
|
6 * Do ":help credits" in Vim to see a list of people who contributed.
|
|
7 * See README.txt for an overview of the Vim source code.
|
|
8 */
|
|
9
|
|
10 /*
|
|
11 * hashtab.c: Handling of a hashtable with Vim-specific properties.
|
|
12 *
|
|
13 * Each item in a hashtable has a NUL terminated string key. A key can appear
|
|
14 * only once in the table.
|
|
15 *
|
|
16 * A hash number is computed from the key for quick lookup. When the hashes
|
|
17 * of two different keys point to the same entry an algorithm is used to
|
|
18 * iterate over other entries in the table until the right one is found.
|
|
19 * To make the iteration work removed keys are different from entries where a
|
|
20 * key was never present.
|
|
21 *
|
|
22 * The mechanism has been partly based on how Python Dictionaries are
|
|
23 * implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4.
|
|
24 *
|
|
25 * The hashtable grows to accommodate more entries when needed. At least 1/3
|
|
26 * of the entries is empty to keep the lookup efficient (at the cost of extra
|
|
27 * memory).
|
|
28 */
|
|
29
|
|
30 #include "vim.h"
|
|
31
|
|
32 #if defined(FEAT_EVAL) || defined(FEAT_SYN_HL) || defined(PROTO)
|
|
33
|
|
34 #if 0
|
|
35 # define HT_DEBUG /* extra checks for table consistency and statistics */
|
|
36
|
|
37 static long hash_count_lookup = 0; /* count number of hashtab lookups */
|
|
38 static long hash_count_perturb = 0; /* count number of "misses" */
|
|
39 #endif
|
|
40
|
|
41 /* Magic value for algorithm that walks through the array. */
|
|
42 #define PERTURB_SHIFT 5
|
|
43
|
|
44 static int hash_may_resize __ARGS((hashtab_T *ht, int minitems));
|
|
45
|
|
46 #if 0 /* currently not used */
|
|
47 /*
|
|
48 * Create an empty hash table.
|
|
49 * Returns NULL when out of memory.
|
|
50 */
|
|
51 hashtab_T *
|
|
52 hash_create()
|
|
53 {
|
|
54 hashtab_T *ht;
|
|
55
|
|
56 ht = (hashtab_T *)alloc(sizeof(hashtab_T));
|
|
57 if (ht != NULL)
|
|
58 hash_init(ht);
|
|
59 return ht;
|
|
60 }
|
|
61 #endif
|
|
62
|
|
63 /*
|
|
64 * Initialize an empty hash table.
|
|
65 */
|
|
66 void
|
|
67 hash_init(ht)
|
|
68 hashtab_T *ht;
|
|
69 {
|
|
70 /* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */
|
|
71 vim_memset(ht, 0, sizeof(hashtab_T));
|
|
72 ht->ht_array = ht->ht_smallarray;
|
|
73 ht->ht_mask = HT_INIT_SIZE - 1;
|
|
74 }
|
|
75
|
|
76 /*
|
|
77 * Free the array of a hash table. Does not free the items it contains!
|
|
78 * If "ht" is not freed then you should call hash_init() next!
|
|
79 */
|
|
80 void
|
|
81 hash_clear(ht)
|
|
82 hashtab_T *ht;
|
|
83 {
|
|
84 if (ht->ht_array != ht->ht_smallarray)
|
|
85 vim_free(ht->ht_array);
|
|
86 }
|
|
87
|
|
88 /*
|
|
89 * Free the array of a hash table and all the keys it contains. The keys must
|
|
90 * have been allocated. "off" is the offset from the start of the allocate
|
|
91 * memory to the location of the key (it's always positive).
|
|
92 */
|
|
93 void
|
|
94 hash_clear_all(ht, off)
|
|
95 hashtab_T *ht;
|
|
96 int off;
|
|
97 {
|
835
|
98 long todo;
|
799
|
99 hashitem_T *hi;
|
|
100
|
835
|
101 todo = (long)ht->ht_used;
|
799
|
102 for (hi = ht->ht_array; todo > 0; ++hi)
|
|
103 {
|
|
104 if (!HASHITEM_EMPTY(hi))
|
|
105 {
|
|
106 vim_free(hi->hi_key - off);
|
|
107 --todo;
|
|
108 }
|
|
109 }
|
|
110 hash_clear(ht);
|
|
111 }
|
|
112
|
|
113 /*
|
|
114 * Find "key" in hashtable "ht". "key" must not be NULL.
|
|
115 * Always returns a pointer to a hashitem. If the item was not found then
|
|
116 * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key
|
|
117 * would be added.
|
|
118 * WARNING: The returned pointer becomes invalid when the hashtable is changed
|
|
119 * (adding, setting or removing an item)!
|
|
120 */
|
|
121 hashitem_T *
|
|
122 hash_find(ht, key)
|
|
123 hashtab_T *ht;
|
|
124 char_u *key;
|
|
125 {
|
|
126 return hash_lookup(ht, key, hash_hash(key));
|
|
127 }
|
|
128
|
|
129 /*
|
|
130 * Like hash_find(), but caller computes "hash".
|
|
131 */
|
|
132 hashitem_T *
|
|
133 hash_lookup(ht, key, hash)
|
|
134 hashtab_T *ht;
|
|
135 char_u *key;
|
|
136 hash_T hash;
|
|
137 {
|
|
138 hash_T perturb;
|
|
139 hashitem_T *freeitem;
|
|
140 hashitem_T *hi;
|
|
141 int idx;
|
|
142
|
|
143 #ifdef HT_DEBUG
|
|
144 ++hash_count_lookup;
|
|
145 #endif
|
|
146
|
|
147 /*
|
|
148 * Quickly handle the most common situations:
|
|
149 * - return if there is no item at all
|
|
150 * - skip over a removed item
|
|
151 * - return if the item matches
|
|
152 */
|
835
|
153 idx = (int)(hash & ht->ht_mask);
|
799
|
154 hi = &ht->ht_array[idx];
|
|
155
|
|
156 if (hi->hi_key == NULL)
|
|
157 return hi;
|
|
158 if (hi->hi_key == HI_KEY_REMOVED)
|
|
159 freeitem = hi;
|
|
160 else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0)
|
|
161 return hi;
|
|
162 else
|
|
163 freeitem = NULL;
|
|
164
|
|
165 /*
|
|
166 * Need to search through the table to find the key. The algorithm
|
|
167 * to step through the table starts with large steps, gradually becoming
|
|
168 * smaller down to (1/4 table size + 1). This means it goes through all
|
|
169 * table entries in the end.
|
|
170 * When we run into a NULL key it's clear that the key isn't there.
|
|
171 * Return the first available slot found (can be a slot of a removed
|
|
172 * item).
|
|
173 */
|
|
174 for (perturb = hash; ; perturb >>= PERTURB_SHIFT)
|
|
175 {
|
|
176 #ifdef HT_DEBUG
|
|
177 ++hash_count_perturb; /* count a "miss" for hashtab lookup */
|
|
178 #endif
|
835
|
179 idx = (int)((idx << 2) + idx + perturb + 1);
|
799
|
180 hi = &ht->ht_array[idx & ht->ht_mask];
|
|
181 if (hi->hi_key == NULL)
|
|
182 return freeitem == NULL ? hi : freeitem;
|
|
183 if (hi->hi_hash == hash
|
|
184 && hi->hi_key != HI_KEY_REMOVED
|
|
185 && STRCMP(hi->hi_key, key) == 0)
|
|
186 return hi;
|
|
187 if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL)
|
|
188 freeitem = hi;
|
|
189 }
|
|
190 }
|
|
191
|
|
192 /*
|
|
193 * Print the efficiency of hashtable lookups.
|
|
194 * Useful when trying different hash algorithms.
|
|
195 * Called when exiting.
|
|
196 */
|
|
197 void
|
|
198 hash_debug_results()
|
|
199 {
|
|
200 #ifdef HT_DEBUG
|
|
201 fprintf(stderr, "\r\n\r\n\r\n\r\n");
|
|
202 fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup);
|
|
203 fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb);
|
|
204 fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n",
|
|
205 hash_count_perturb * 100 / hash_count_lookup);
|
|
206 #endif
|
|
207 }
|
|
208
|
|
209 /*
|
|
210 * Add item with key "key" to hashtable "ht".
|
|
211 * Returns FAIL when out of memory or the key is already present.
|
|
212 */
|
|
213 int
|
|
214 hash_add(ht, key)
|
|
215 hashtab_T *ht;
|
|
216 char_u *key;
|
|
217 {
|
|
218 hash_T hash = hash_hash(key);
|
|
219 hashitem_T *hi;
|
|
220
|
|
221 hi = hash_lookup(ht, key, hash);
|
|
222 if (!HASHITEM_EMPTY(hi))
|
|
223 {
|
|
224 EMSG2(_(e_intern2), "hash_add()");
|
|
225 return FAIL;
|
|
226 }
|
|
227 return hash_add_item(ht, hi, key, hash);
|
|
228 }
|
|
229
|
|
230 /*
|
|
231 * Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and
|
|
232 * "hi" must have been obtained with hash_lookup() and point to an empty item.
|
|
233 * "hi" is invalid after this!
|
|
234 * Returns OK or FAIL (out of memory).
|
|
235 */
|
|
236 int
|
|
237 hash_add_item(ht, hi, key, hash)
|
|
238 hashtab_T *ht;
|
|
239 hashitem_T *hi;
|
|
240 char_u *key;
|
|
241 hash_T hash;
|
|
242 {
|
|
243 /* If resizing failed before and it fails again we can't add an item. */
|
|
244 if (ht->ht_error && hash_may_resize(ht, 0) == FAIL)
|
|
245 return FAIL;
|
|
246
|
|
247 ++ht->ht_used;
|
|
248 if (hi->hi_key == NULL)
|
|
249 ++ht->ht_filled;
|
|
250 hi->hi_key = key;
|
|
251 hi->hi_hash = hash;
|
|
252
|
|
253 /* When the space gets low may resize the array. */
|
|
254 return hash_may_resize(ht, 0);
|
|
255 }
|
|
256
|
|
257 #if 0 /* not used */
|
|
258 /*
|
|
259 * Overwrite hashtable item "hi" with "key". "hi" must point to the item that
|
|
260 * is to be overwritten. Thus the number of items in the hashtable doesn't
|
|
261 * change.
|
|
262 * Although the key must be identical, the pointer may be different, thus it's
|
|
263 * set anyway (the key is part of an item with that key).
|
|
264 * The caller must take care of freeing the old item.
|
|
265 * "hi" is invalid after this!
|
|
266 */
|
|
267 void
|
|
268 hash_set(hi, key)
|
|
269 hashitem_T *hi;
|
|
270 char_u *key;
|
|
271 {
|
|
272 hi->hi_key = key;
|
|
273 }
|
|
274 #endif
|
|
275
|
|
276 /*
|
|
277 * Remove item "hi" from hashtable "ht". "hi" must have been obtained with
|
|
278 * hash_lookup().
|
|
279 * The caller must take care of freeing the item itself.
|
|
280 */
|
|
281 void
|
|
282 hash_remove(ht, hi)
|
|
283 hashtab_T *ht;
|
|
284 hashitem_T *hi;
|
|
285 {
|
|
286 --ht->ht_used;
|
|
287 hi->hi_key = HI_KEY_REMOVED;
|
|
288 hash_may_resize(ht, 0);
|
|
289 }
|
|
290
|
|
291 /*
|
|
292 * Lock a hashtable: prevent that ht_array changes.
|
|
293 * Don't use this when items are to be added!
|
|
294 * Must call hash_unlock() later.
|
|
295 */
|
|
296 void
|
|
297 hash_lock(ht)
|
|
298 hashtab_T *ht;
|
|
299 {
|
|
300 ++ht->ht_locked;
|
|
301 }
|
|
302
|
|
303 #if 0 /* currently not used */
|
|
304 /*
|
|
305 * Lock a hashtable at the specified number of entries.
|
|
306 * Caller must make sure no more than "size" entries will be added.
|
|
307 * Must call hash_unlock() later.
|
|
308 */
|
|
309 void
|
|
310 hash_lock_size(ht, size)
|
|
311 hashtab_T *ht;
|
|
312 int size;
|
|
313 {
|
|
314 (void)hash_may_resize(ht, size);
|
|
315 ++ht->ht_locked;
|
|
316 }
|
|
317 #endif
|
|
318
|
|
319 /*
|
|
320 * Unlock a hashtable: allow ht_array changes again.
|
|
321 * Table will be resized (shrink) when necessary.
|
|
322 * This must balance a call to hash_lock().
|
|
323 */
|
|
324 void
|
|
325 hash_unlock(ht)
|
|
326 hashtab_T *ht;
|
|
327 {
|
|
328 --ht->ht_locked;
|
|
329 (void)hash_may_resize(ht, 0);
|
|
330 }
|
|
331
|
|
332 /*
|
|
333 * Shrink a hashtable when there is too much empty space.
|
|
334 * Grow a hashtable when there is not enough empty space.
|
|
335 * Returns OK or FAIL (out of memory).
|
|
336 */
|
|
337 static int
|
|
338 hash_may_resize(ht, minitems)
|
|
339 hashtab_T *ht;
|
|
340 int minitems; /* minimal number of items */
|
|
341 {
|
|
342 hashitem_T temparray[HT_INIT_SIZE];
|
|
343 hashitem_T *oldarray, *newarray;
|
|
344 hashitem_T *olditem, *newitem;
|
|
345 int newi;
|
|
346 int todo;
|
|
347 long_u oldsize, newsize;
|
|
348 long_u minsize;
|
|
349 long_u newmask;
|
|
350 hash_T perturb;
|
|
351
|
|
352 /* Don't resize a locked table. */
|
|
353 if (ht->ht_locked > 0)
|
|
354 return OK;
|
|
355
|
|
356 #ifdef HT_DEBUG
|
|
357 if (ht->ht_used > ht->ht_filled)
|
|
358 EMSG("hash_may_resize(): more used than filled");
|
|
359 if (ht->ht_filled >= ht->ht_mask + 1)
|
|
360 EMSG("hash_may_resize(): table completely filled");
|
|
361 #endif
|
|
362
|
|
363 if (minitems == 0)
|
|
364 {
|
|
365 /* Return quickly for small tables with at least two NULL items. NULL
|
|
366 * items are required for the lookup to decide a key isn't there. */
|
|
367 if (ht->ht_filled < HT_INIT_SIZE - 1
|
|
368 && ht->ht_array == ht->ht_smallarray)
|
|
369 return OK;
|
|
370
|
|
371 /*
|
|
372 * Grow or refill the array when it's more than 2/3 full (including
|
|
373 * removed items, so that they get cleaned up).
|
|
374 * Shrink the array when it's less than 1/5 full. When growing it is
|
|
375 * at least 1/4 full (avoids repeated grow-shrink operations)
|
|
376 */
|
|
377 oldsize = ht->ht_mask + 1;
|
|
378 if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5)
|
|
379 return OK;
|
|
380
|
|
381 if (ht->ht_used > 1000)
|
|
382 minsize = ht->ht_used * 2; /* it's big, don't make too much room */
|
|
383 else
|
|
384 minsize = ht->ht_used * 4; /* make plenty of room */
|
|
385 }
|
|
386 else
|
|
387 {
|
|
388 /* Use specified size. */
|
|
389 if ((long_u)minitems < ht->ht_used) /* just in case... */
|
835
|
390 minitems = (int)ht->ht_used;
|
799
|
391 minsize = minitems * 3 / 2; /* array is up to 2/3 full */
|
|
392 }
|
|
393
|
|
394 newsize = HT_INIT_SIZE;
|
|
395 while (newsize < minsize)
|
|
396 {
|
|
397 newsize <<= 1; /* make sure it's always a power of 2 */
|
|
398 if (newsize == 0)
|
|
399 return FAIL; /* overflow */
|
|
400 }
|
|
401
|
|
402 if (newsize == HT_INIT_SIZE)
|
|
403 {
|
|
404 /* Use the small array inside the hashdict structure. */
|
|
405 newarray = ht->ht_smallarray;
|
|
406 if (ht->ht_array == newarray)
|
|
407 {
|
|
408 /* Moving from ht_smallarray to ht_smallarray! Happens when there
|
|
409 * are many removed items. Copy the items to be able to clean up
|
|
410 * removed items. */
|
|
411 mch_memmove(temparray, newarray, sizeof(temparray));
|
|
412 oldarray = temparray;
|
|
413 }
|
|
414 else
|
|
415 oldarray = ht->ht_array;
|
|
416 }
|
|
417 else
|
|
418 {
|
|
419 /* Allocate an array. */
|
|
420 newarray = (hashitem_T *)alloc((unsigned)
|
|
421 (sizeof(hashitem_T) * newsize));
|
|
422 if (newarray == NULL)
|
|
423 {
|
|
424 /* Out of memory. When there are NULL items still return OK.
|
|
425 * Otherwise set ht_error, because lookup may result in a hang if
|
|
426 * we add another item. */
|
|
427 if (ht->ht_filled < ht->ht_mask)
|
|
428 return OK;
|
|
429 ht->ht_error = TRUE;
|
|
430 return FAIL;
|
|
431 }
|
|
432 oldarray = ht->ht_array;
|
|
433 }
|
|
434 vim_memset(newarray, 0, (size_t)(sizeof(hashitem_T) * newsize));
|
|
435
|
|
436 /*
|
|
437 * Move all the items from the old array to the new one, placing them in
|
|
438 * the right spot. The new array won't have any removed items, thus this
|
|
439 * is also a cleanup action.
|
|
440 */
|
|
441 newmask = newsize - 1;
|
835
|
442 todo = (int)ht->ht_used;
|
799
|
443 for (olditem = oldarray; todo > 0; ++olditem)
|
|
444 if (!HASHITEM_EMPTY(olditem))
|
|
445 {
|
|
446 /*
|
|
447 * The algorithm to find the spot to add the item is identical to
|
|
448 * the algorithm to find an item in hash_lookup(). But we only
|
|
449 * need to search for a NULL key, thus it's simpler.
|
|
450 */
|
835
|
451 newi = (int)(olditem->hi_hash & newmask);
|
799
|
452 newitem = &newarray[newi];
|
|
453
|
|
454 if (newitem->hi_key != NULL)
|
|
455 for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT)
|
|
456 {
|
835
|
457 newi = (int)((newi << 2) + newi + perturb + 1);
|
799
|
458 newitem = &newarray[newi & newmask];
|
|
459 if (newitem->hi_key == NULL)
|
|
460 break;
|
|
461 }
|
|
462 *newitem = *olditem;
|
|
463 --todo;
|
|
464 }
|
|
465
|
|
466 if (ht->ht_array != ht->ht_smallarray)
|
|
467 vim_free(ht->ht_array);
|
|
468 ht->ht_array = newarray;
|
|
469 ht->ht_mask = newmask;
|
|
470 ht->ht_filled = ht->ht_used;
|
|
471 ht->ht_error = FALSE;
|
|
472
|
|
473 return OK;
|
|
474 }
|
|
475
|
|
476 /*
|
|
477 * Get the hash number for a key.
|
|
478 * If you think you know a better hash function: Compile with HT_DEBUG set and
|
|
479 * run a script that uses hashtables a lot. Vim will then print statistics
|
|
480 * when exiting. Try that with the current hash algorithm and yours. The
|
|
481 * lower the percentage the better.
|
|
482 */
|
|
483 hash_T
|
|
484 hash_hash(key)
|
|
485 char_u *key;
|
|
486 {
|
|
487 hash_T hash;
|
|
488 char_u *p;
|
|
489
|
|
490 if ((hash = *key) == 0)
|
|
491 return (hash_T)0; /* Empty keys are not allowed, but we don't
|
|
492 want to crash if we get one. */
|
|
493 p = key + 1;
|
|
494
|
|
495 #if 0
|
|
496 /* ElfHash algorithm, which is supposed to have an even distribution.
|
|
497 * Suggested by Charles Campbell. */
|
|
498 hash_T g;
|
|
499
|
|
500 while (*p != NUL)
|
|
501 {
|
|
502 hash = (hash << 4) + *p++; /* clear low 4 bits of hash, add char */
|
|
503 g = hash & 0xf0000000L; /* g has high 4 bits of hash only */
|
|
504 if (g != 0)
|
|
505 hash ^= g >> 24; /* xor g's high 4 bits into hash */
|
|
506 }
|
|
507 #else
|
|
508
|
|
509 /* A simplistic algorithm that appears to do very well.
|
|
510 * Suggested by George Reilly. */
|
|
511 while (*p != NUL)
|
|
512 hash = hash * 101 + *p++;
|
|
513 #endif
|
|
514
|
|
515 return hash;
|
|
516 }
|
|
517
|
|
518 #endif
|