Mercurial > vim
annotate src/hashtab.c @ 7897:43071dab7621 v7.4.1245
commit https://github.com/vim/vim/commit/d087566a419cc107adab77db997b184ea0e433ad
Author: Bram Moolenaar <Bram@vim.org>
Date: Tue Feb 2 20:52:42 2016 +0100
patch 7.4.1245
Problem: File missing from distribution.
Solution: Add json_test.c.
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Tue, 02 Feb 2016 21:00:08 +0100 |
parents | bcef391c101c |
children | 9fa567d13551 |
rev | line source |
---|---|
799 | 1 /* vi:set ts=8 sts=4 sw=4: |
2 * | |
3 * VIM - Vi IMproved by Bram Moolenaar | |
4 * | |
5 * Do ":help uganda" in Vim to read copying and usage conditions. | |
6 * Do ":help credits" in Vim to see a list of people who contributed. | |
7 * See README.txt for an overview of the Vim source code. | |
8 */ | |
9 | |
10 /* | |
11 * hashtab.c: Handling of a hashtable with Vim-specific properties. | |
12 * | |
13 * Each item in a hashtable has a NUL terminated string key. A key can appear | |
14 * only once in the table. | |
15 * | |
16 * A hash number is computed from the key for quick lookup. When the hashes | |
17 * of two different keys point to the same entry an algorithm is used to | |
18 * iterate over other entries in the table until the right one is found. | |
19 * To make the iteration work removed keys are different from entries where a | |
20 * key was never present. | |
21 * | |
22 * The mechanism has been partly based on how Python Dictionaries are | |
23 * implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4. | |
24 * | |
25 * The hashtable grows to accommodate more entries when needed. At least 1/3 | |
26 * of the entries is empty to keep the lookup efficient (at the cost of extra | |
27 * memory). | |
28 */ | |
29 | |
30 #include "vim.h" | |
31 | |
32 #if defined(FEAT_EVAL) || defined(FEAT_SYN_HL) || defined(PROTO) | |
33 | |
34 #if 0 | |
35 # define HT_DEBUG /* extra checks for table consistency and statistics */ | |
36 | |
37 static long hash_count_lookup = 0; /* count number of hashtab lookups */ | |
38 static long hash_count_perturb = 0; /* count number of "misses" */ | |
39 #endif | |
40 | |
41 /* Magic value for algorithm that walks through the array. */ | |
42 #define PERTURB_SHIFT 5 | |
43 | |
7803
37c929c4a073
commit https://github.com/vim/vim/commit/92b8b2d307e34117f146319872010b0ccc9d2713
Christian Brabandt <cb@256bit.org>
parents:
3970
diff
changeset
|
44 static int hash_may_resize(hashtab_T *ht, int minitems); |
799 | 45 |
46 #if 0 /* currently not used */ | |
47 /* | |
48 * Create an empty hash table. | |
49 * Returns NULL when out of memory. | |
50 */ | |
51 hashtab_T * | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
52 hash_create(void) |
799 | 53 { |
54 hashtab_T *ht; | |
55 | |
56 ht = (hashtab_T *)alloc(sizeof(hashtab_T)); | |
57 if (ht != NULL) | |
58 hash_init(ht); | |
59 return ht; | |
60 } | |
61 #endif | |
62 | |
63 /* | |
64 * Initialize an empty hash table. | |
65 */ | |
66 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
67 hash_init(hashtab_T *ht) |
799 | 68 { |
69 /* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */ | |
70 vim_memset(ht, 0, sizeof(hashtab_T)); | |
71 ht->ht_array = ht->ht_smallarray; | |
72 ht->ht_mask = HT_INIT_SIZE - 1; | |
73 } | |
74 | |
75 /* | |
76 * Free the array of a hash table. Does not free the items it contains! | |
77 * If "ht" is not freed then you should call hash_init() next! | |
78 */ | |
79 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
80 hash_clear(hashtab_T *ht) |
799 | 81 { |
82 if (ht->ht_array != ht->ht_smallarray) | |
83 vim_free(ht->ht_array); | |
84 } | |
85 | |
86 /* | |
87 * Free the array of a hash table and all the keys it contains. The keys must | |
88 * have been allocated. "off" is the offset from the start of the allocate | |
89 * memory to the location of the key (it's always positive). | |
90 */ | |
91 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
92 hash_clear_all(hashtab_T *ht, int off) |
799 | 93 { |
835 | 94 long todo; |
799 | 95 hashitem_T *hi; |
96 | |
835 | 97 todo = (long)ht->ht_used; |
799 | 98 for (hi = ht->ht_array; todo > 0; ++hi) |
99 { | |
100 if (!HASHITEM_EMPTY(hi)) | |
101 { | |
102 vim_free(hi->hi_key - off); | |
103 --todo; | |
104 } | |
105 } | |
106 hash_clear(ht); | |
107 } | |
108 | |
109 /* | |
110 * Find "key" in hashtable "ht". "key" must not be NULL. | |
111 * Always returns a pointer to a hashitem. If the item was not found then | |
112 * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key | |
113 * would be added. | |
114 * WARNING: The returned pointer becomes invalid when the hashtable is changed | |
115 * (adding, setting or removing an item)! | |
116 */ | |
117 hashitem_T * | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
118 hash_find(hashtab_T *ht, char_u *key) |
799 | 119 { |
120 return hash_lookup(ht, key, hash_hash(key)); | |
121 } | |
122 | |
123 /* | |
124 * Like hash_find(), but caller computes "hash". | |
125 */ | |
126 hashitem_T * | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
127 hash_lookup(hashtab_T *ht, char_u *key, hash_T hash) |
799 | 128 { |
129 hash_T perturb; | |
130 hashitem_T *freeitem; | |
131 hashitem_T *hi; | |
3970 | 132 unsigned idx; |
799 | 133 |
134 #ifdef HT_DEBUG | |
135 ++hash_count_lookup; | |
136 #endif | |
137 | |
138 /* | |
139 * Quickly handle the most common situations: | |
140 * - return if there is no item at all | |
141 * - skip over a removed item | |
142 * - return if the item matches | |
143 */ | |
3970 | 144 idx = (unsigned)(hash & ht->ht_mask); |
799 | 145 hi = &ht->ht_array[idx]; |
146 | |
147 if (hi->hi_key == NULL) | |
148 return hi; | |
149 if (hi->hi_key == HI_KEY_REMOVED) | |
150 freeitem = hi; | |
151 else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0) | |
152 return hi; | |
153 else | |
154 freeitem = NULL; | |
155 | |
156 /* | |
157 * Need to search through the table to find the key. The algorithm | |
158 * to step through the table starts with large steps, gradually becoming | |
159 * smaller down to (1/4 table size + 1). This means it goes through all | |
160 * table entries in the end. | |
161 * When we run into a NULL key it's clear that the key isn't there. | |
162 * Return the first available slot found (can be a slot of a removed | |
163 * item). | |
164 */ | |
165 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) | |
166 { | |
167 #ifdef HT_DEBUG | |
168 ++hash_count_perturb; /* count a "miss" for hashtab lookup */ | |
169 #endif | |
3970 | 170 idx = (unsigned)((idx << 2U) + idx + perturb + 1U); |
799 | 171 hi = &ht->ht_array[idx & ht->ht_mask]; |
172 if (hi->hi_key == NULL) | |
173 return freeitem == NULL ? hi : freeitem; | |
174 if (hi->hi_hash == hash | |
175 && hi->hi_key != HI_KEY_REMOVED | |
176 && STRCMP(hi->hi_key, key) == 0) | |
177 return hi; | |
178 if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL) | |
179 freeitem = hi; | |
180 } | |
181 } | |
182 | |
183 /* | |
184 * Print the efficiency of hashtable lookups. | |
185 * Useful when trying different hash algorithms. | |
186 * Called when exiting. | |
187 */ | |
188 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
189 hash_debug_results(void) |
799 | 190 { |
191 #ifdef HT_DEBUG | |
192 fprintf(stderr, "\r\n\r\n\r\n\r\n"); | |
193 fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup); | |
194 fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb); | |
195 fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n", | |
196 hash_count_perturb * 100 / hash_count_lookup); | |
197 #endif | |
198 } | |
199 | |
200 /* | |
201 * Add item with key "key" to hashtable "ht". | |
202 * Returns FAIL when out of memory or the key is already present. | |
203 */ | |
204 int | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
205 hash_add(hashtab_T *ht, char_u *key) |
799 | 206 { |
207 hash_T hash = hash_hash(key); | |
208 hashitem_T *hi; | |
209 | |
210 hi = hash_lookup(ht, key, hash); | |
211 if (!HASHITEM_EMPTY(hi)) | |
212 { | |
213 EMSG2(_(e_intern2), "hash_add()"); | |
214 return FAIL; | |
215 } | |
216 return hash_add_item(ht, hi, key, hash); | |
217 } | |
218 | |
219 /* | |
220 * Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and | |
221 * "hi" must have been obtained with hash_lookup() and point to an empty item. | |
222 * "hi" is invalid after this! | |
223 * Returns OK or FAIL (out of memory). | |
224 */ | |
225 int | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
226 hash_add_item( |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
227 hashtab_T *ht, |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
228 hashitem_T *hi, |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
229 char_u *key, |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
230 hash_T hash) |
799 | 231 { |
232 /* If resizing failed before and it fails again we can't add an item. */ | |
233 if (ht->ht_error && hash_may_resize(ht, 0) == FAIL) | |
234 return FAIL; | |
235 | |
236 ++ht->ht_used; | |
237 if (hi->hi_key == NULL) | |
238 ++ht->ht_filled; | |
239 hi->hi_key = key; | |
240 hi->hi_hash = hash; | |
241 | |
242 /* When the space gets low may resize the array. */ | |
243 return hash_may_resize(ht, 0); | |
244 } | |
245 | |
246 #if 0 /* not used */ | |
247 /* | |
248 * Overwrite hashtable item "hi" with "key". "hi" must point to the item that | |
249 * is to be overwritten. Thus the number of items in the hashtable doesn't | |
250 * change. | |
251 * Although the key must be identical, the pointer may be different, thus it's | |
252 * set anyway (the key is part of an item with that key). | |
253 * The caller must take care of freeing the old item. | |
254 * "hi" is invalid after this! | |
255 */ | |
256 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
257 hash_set(hashitem_T *hi, char_u *key) |
799 | 258 { |
259 hi->hi_key = key; | |
260 } | |
261 #endif | |
262 | |
263 /* | |
264 * Remove item "hi" from hashtable "ht". "hi" must have been obtained with | |
265 * hash_lookup(). | |
266 * The caller must take care of freeing the item itself. | |
267 */ | |
268 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
269 hash_remove(hashtab_T *ht, hashitem_T *hi) |
799 | 270 { |
271 --ht->ht_used; | |
272 hi->hi_key = HI_KEY_REMOVED; | |
273 hash_may_resize(ht, 0); | |
274 } | |
275 | |
276 /* | |
277 * Lock a hashtable: prevent that ht_array changes. | |
278 * Don't use this when items are to be added! | |
279 * Must call hash_unlock() later. | |
280 */ | |
281 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
282 hash_lock(hashtab_T *ht) |
799 | 283 { |
284 ++ht->ht_locked; | |
285 } | |
286 | |
287 #if 0 /* currently not used */ | |
288 /* | |
289 * Lock a hashtable at the specified number of entries. | |
290 * Caller must make sure no more than "size" entries will be added. | |
291 * Must call hash_unlock() later. | |
292 */ | |
293 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
294 hash_lock_size(hashtab_T *ht, int size) |
799 | 295 { |
296 (void)hash_may_resize(ht, size); | |
297 ++ht->ht_locked; | |
298 } | |
299 #endif | |
300 | |
301 /* | |
302 * Unlock a hashtable: allow ht_array changes again. | |
303 * Table will be resized (shrink) when necessary. | |
304 * This must balance a call to hash_lock(). | |
305 */ | |
306 void | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
307 hash_unlock(hashtab_T *ht) |
799 | 308 { |
309 --ht->ht_locked; | |
310 (void)hash_may_resize(ht, 0); | |
311 } | |
312 | |
313 /* | |
314 * Shrink a hashtable when there is too much empty space. | |
315 * Grow a hashtable when there is not enough empty space. | |
316 * Returns OK or FAIL (out of memory). | |
317 */ | |
318 static int | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
319 hash_may_resize( |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
320 hashtab_T *ht, |
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
321 int minitems) /* minimal number of items */ |
799 | 322 { |
323 hashitem_T temparray[HT_INIT_SIZE]; | |
324 hashitem_T *oldarray, *newarray; | |
325 hashitem_T *olditem, *newitem; | |
3970 | 326 unsigned newi; |
799 | 327 int todo; |
328 long_u oldsize, newsize; | |
329 long_u minsize; | |
330 long_u newmask; | |
331 hash_T perturb; | |
332 | |
333 /* Don't resize a locked table. */ | |
334 if (ht->ht_locked > 0) | |
335 return OK; | |
336 | |
337 #ifdef HT_DEBUG | |
338 if (ht->ht_used > ht->ht_filled) | |
339 EMSG("hash_may_resize(): more used than filled"); | |
340 if (ht->ht_filled >= ht->ht_mask + 1) | |
341 EMSG("hash_may_resize(): table completely filled"); | |
342 #endif | |
343 | |
344 if (minitems == 0) | |
345 { | |
346 /* Return quickly for small tables with at least two NULL items. NULL | |
347 * items are required for the lookup to decide a key isn't there. */ | |
348 if (ht->ht_filled < HT_INIT_SIZE - 1 | |
349 && ht->ht_array == ht->ht_smallarray) | |
350 return OK; | |
351 | |
352 /* | |
353 * Grow or refill the array when it's more than 2/3 full (including | |
354 * removed items, so that they get cleaned up). | |
355 * Shrink the array when it's less than 1/5 full. When growing it is | |
356 * at least 1/4 full (avoids repeated grow-shrink operations) | |
357 */ | |
358 oldsize = ht->ht_mask + 1; | |
359 if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5) | |
360 return OK; | |
361 | |
362 if (ht->ht_used > 1000) | |
363 minsize = ht->ht_used * 2; /* it's big, don't make too much room */ | |
364 else | |
365 minsize = ht->ht_used * 4; /* make plenty of room */ | |
366 } | |
367 else | |
368 { | |
369 /* Use specified size. */ | |
370 if ((long_u)minitems < ht->ht_used) /* just in case... */ | |
835 | 371 minitems = (int)ht->ht_used; |
799 | 372 minsize = minitems * 3 / 2; /* array is up to 2/3 full */ |
373 } | |
374 | |
375 newsize = HT_INIT_SIZE; | |
376 while (newsize < minsize) | |
377 { | |
378 newsize <<= 1; /* make sure it's always a power of 2 */ | |
379 if (newsize == 0) | |
380 return FAIL; /* overflow */ | |
381 } | |
382 | |
383 if (newsize == HT_INIT_SIZE) | |
384 { | |
385 /* Use the small array inside the hashdict structure. */ | |
386 newarray = ht->ht_smallarray; | |
387 if (ht->ht_array == newarray) | |
388 { | |
389 /* Moving from ht_smallarray to ht_smallarray! Happens when there | |
390 * are many removed items. Copy the items to be able to clean up | |
391 * removed items. */ | |
392 mch_memmove(temparray, newarray, sizeof(temparray)); | |
393 oldarray = temparray; | |
394 } | |
395 else | |
396 oldarray = ht->ht_array; | |
397 } | |
398 else | |
399 { | |
400 /* Allocate an array. */ | |
401 newarray = (hashitem_T *)alloc((unsigned) | |
402 (sizeof(hashitem_T) * newsize)); | |
403 if (newarray == NULL) | |
404 { | |
405 /* Out of memory. When there are NULL items still return OK. | |
406 * Otherwise set ht_error, because lookup may result in a hang if | |
407 * we add another item. */ | |
408 if (ht->ht_filled < ht->ht_mask) | |
409 return OK; | |
410 ht->ht_error = TRUE; | |
411 return FAIL; | |
412 } | |
413 oldarray = ht->ht_array; | |
414 } | |
415 vim_memset(newarray, 0, (size_t)(sizeof(hashitem_T) * newsize)); | |
416 | |
417 /* | |
418 * Move all the items from the old array to the new one, placing them in | |
419 * the right spot. The new array won't have any removed items, thus this | |
420 * is also a cleanup action. | |
421 */ | |
422 newmask = newsize - 1; | |
835 | 423 todo = (int)ht->ht_used; |
799 | 424 for (olditem = oldarray; todo > 0; ++olditem) |
425 if (!HASHITEM_EMPTY(olditem)) | |
426 { | |
427 /* | |
428 * The algorithm to find the spot to add the item is identical to | |
429 * the algorithm to find an item in hash_lookup(). But we only | |
430 * need to search for a NULL key, thus it's simpler. | |
431 */ | |
3970 | 432 newi = (unsigned)(olditem->hi_hash & newmask); |
799 | 433 newitem = &newarray[newi]; |
434 | |
435 if (newitem->hi_key != NULL) | |
436 for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT) | |
437 { | |
3970 | 438 newi = (unsigned)((newi << 2U) + newi + perturb + 1U); |
799 | 439 newitem = &newarray[newi & newmask]; |
440 if (newitem->hi_key == NULL) | |
441 break; | |
442 } | |
443 *newitem = *olditem; | |
444 --todo; | |
445 } | |
446 | |
447 if (ht->ht_array != ht->ht_smallarray) | |
448 vim_free(ht->ht_array); | |
449 ht->ht_array = newarray; | |
450 ht->ht_mask = newmask; | |
451 ht->ht_filled = ht->ht_used; | |
452 ht->ht_error = FALSE; | |
453 | |
454 return OK; | |
455 } | |
456 | |
457 /* | |
458 * Get the hash number for a key. | |
459 * If you think you know a better hash function: Compile with HT_DEBUG set and | |
460 * run a script that uses hashtables a lot. Vim will then print statistics | |
461 * when exiting. Try that with the current hash algorithm and yours. The | |
462 * lower the percentage the better. | |
463 */ | |
464 hash_T | |
7823
bcef391c101c
commit https://github.com/vim/vim/commit/68c2f638e65d914dc6e84eb7ce2624f08af525c0
Christian Brabandt <cb@256bit.org>
parents:
7803
diff
changeset
|
465 hash_hash(char_u *key) |
799 | 466 { |
467 hash_T hash; | |
468 char_u *p; | |
469 | |
470 if ((hash = *key) == 0) | |
471 return (hash_T)0; /* Empty keys are not allowed, but we don't | |
472 want to crash if we get one. */ | |
473 p = key + 1; | |
474 | |
475 /* A simplistic algorithm that appears to do very well. | |
476 * Suggested by George Reilly. */ | |
477 while (*p != NUL) | |
478 hash = hash * 101 + *p++; | |
479 | |
480 return hash; | |
481 } | |
482 | |
483 #endif |