# HG changeset patch # User vimboss # Date 1106173275 0 # Node ID 34423a71d203cea82361c07d90199944eea2f674 # Parent e8f07016e34d01c93db3d6e9a28aac51e274134b updated for version 7.0042 diff --git a/src/Make_w16.mak b/src/Make_w16.mak --- a/src/Make_w16.mak +++ b/src/Make_w16.mak @@ -87,6 +87,7 @@ ObjFiles = \ $(INTDIR)\fileio.obj\ $(INTDIR)\fold.obj\ $(INTDIR)\getchar.obj\ + $(INTDIR)\hashtable.obj\ $(INTDIR)\gui.obj\ $(INTDIR)\gui_w16.obj\ $(INTDIR)\main.obj\ diff --git a/src/hashtable.c b/src/hashtable.c new file mode 100644 --- /dev/null +++ b/src/hashtable.c @@ -0,0 +1,393 @@ +/* vi:set ts=8 sts=4 sw=4: + * + * VIM - Vi IMproved by Bram Moolenaar + * + * Do ":help uganda" in Vim to read copying and usage conditions. + * Do ":help credits" in Vim to see a list of people who contributed. + * See README.txt for an overview of the Vim source code. + */ + +/* + * hashtable.c: Handling of a hashtable with Vim-specific properties. + * + * Each item in a hashtable has a NUL terminated string key. A key can appear + * only once in the table. + * + * A hash number is computed from the key for quick lookup. When the hashes + * of two different keys point to the same entry an algorithm is used to + * iterate over other entries in the table until the right one is found. + * To make the iteration work removed keys are different from entries where a + * key was never present. + * + * The mechanism has been partly based on how Python Dictionaries are + * implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4. + * + * The hashtable grows to accommodate more entries when needed. At least 1/3 + * of the entries is empty to keep the lookup efficient (at the cost of extra + * memory). + */ + +#include "vim.h" + +#if defined(FEAT_EVAL) || defined(FEAT_SYN_HL) || defined(PROTO) + +#if 1 +# define HT_DEBUG /* extra checks for table consistency */ +#endif + +/* Magic value for algorithm that walks through the array. */ +#define PERTURB_SHIFT 5 + +static hashitem *hash_lookup __ARGS((hashtable *ht, char_u *key, long_u hash)); +static int hash_add_item __ARGS((hashtable *ht, hashitem *hi, char_u *key, long_u hash)); +static int hash_may_resize __ARGS((hashtable *ht)); +static long_u hash_hash __ARGS((char_u *key)); + +#if 0 /* not used */ +/* + * Create an empty hash table. + * Returns NULL when out of memory. + */ + hashtable * +hash_create() +{ + hashtable *ht; + + ht = (hashtable *)alloc(sizeof(hashtable)); + if (ht != NULL) + hash_init(ht); + return ht; +} +#endif + +/* + * Initialize an empty hash table. + */ + void +hash_init(ht) + hashtable *ht; +{ + /* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */ + vim_memset(ht, 0, sizeof(hashtable)); + ht->ht_array = ht->ht_smallarray; + ht->ht_mask = HT_INIT_SIZE - 1; +} + +#if 0 /* not used */ +/* + * Free a hash table. Does not free the items it contains! + */ + void +hash_free(ht) + hashtable *ht; +{ + if (ht->ht_array != ht->ht_smallarray) + vim_free(ht->ht_array); + vim_free(ht); +} +#endif + +/* + * Find "key" in hashtable "ht". "key" must not be NULL. + * Always returns a pointer to a hashitem. If the item was not found then + * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key + * would be added. + * WARNING: The returned pointer becomes invalid when the hashtable is changed + * (adding, setting or removing an item)! + */ + hashitem * +hash_find(ht, key) + hashtable *ht; + char_u *key; +{ + return hash_lookup(ht, key, hash_hash(key)); +} + +/* + * Like hash_find(), but caller computes "hash". + */ + static hashitem * +hash_lookup(ht, key, hash) + hashtable *ht; + char_u *key; + long_u hash; +{ + long_u perturb; + hashitem *freeitem; + hashitem *hi; + int idx; + + /* + * Quickly handle the most common situations: + * - return if there is no item at all + * - skip over a removed item + * - return if the item matches + */ + idx = hash & ht->ht_mask; + hi = &ht->ht_array[idx]; + + if (hi->hi_key == NULL) + return hi; + if (hi->hi_key == HI_KEY_REMOVED) + freeitem = hi; + else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0) + return hi; + else + freeitem = NULL; + + /* + * Need to search through the table to find the key. The algorithm + * to step through the table starts with large steps, gradually becoming + * smaller down to (1/4 table size + 1). This means it goes through all + * table entries in the end. + * When we run into a NULL key it's clear that the key isn't there. + * Return the first available slot found (can be a slot of a removed + * item). + */ + for (perturb = hash; ; perturb >>= PERTURB_SHIFT) + { + idx = (idx << 2) + idx + perturb + 1; + hi = &ht->ht_array[idx & ht->ht_mask]; + if (hi->hi_key == NULL) + return freeitem == NULL ? hi : freeitem; + if (hi->hi_hash == hash + && hi->hi_key != HI_KEY_REMOVED + && STRCMP(hi->hi_key, key) == 0) + return hi; + if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL) + freeitem = hi; + } +} + +/* + * Add item with key "key" to hashtable "ht". + * Returns FAIL when out of memory or the key is already present. + */ + int +hash_add(ht, key) + hashtable *ht; + char_u *key; +{ + long_u hash = hash_hash(key); + hashitem *hi; + + hi = hash_lookup(ht, key, hash); + if (!HASHITEM_EMPTY(hi)) + { + EMSG2(_(e_intern2), "hash_add()"); + return FAIL; + } + return hash_add_item(ht, hi, key, hash); +} + +/* + * Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and + * "hi" must have been obtained with hash_lookup() and point to an empty item. + * "hi" is invalid after this! + * Returns OK or FAIL (out of memory). + */ + static int +hash_add_item(ht, hi, key, hash) + hashtable *ht; + hashitem *hi; + char_u *key; + long_u hash; +{ + /* If resizing failed before and it fails again we can't add an item. */ + if (ht->ht_error && hash_may_resize(ht) == FAIL) + return FAIL; + + ++ht->ht_used; + if (hi->hi_key == NULL) + ++ht->ht_filled; + hi->hi_key = key; + hi->hi_hash = hash; + + /* When the space gets low may resize the array. */ + return hash_may_resize(ht); +} + +#if 0 /* not used */ +/* + * Overwrite hashtable item "hi" with "key". "hi" must point to the item that + * is to be overwritten. Thus the number of items in the hashtable doesn't + * change. + * Although the key must be identical, the pointer may be different, thus it's + * set anyway (the key is part of an item with that key). + * The caller must take care of freeing the old item. + * "hi" is invalid after this! + */ + void +hash_set(hi, key) + hashitem *hi; + char_u *key; +{ + hi->hi_key = key; +} +#endif + +/* + * Remove item "hi" from hashtable "ht". "hi" must have been obtained with + * hash_lookup() and point to a used empty item. + * The caller must take care of freeing the item. + */ + void +hash_remove(ht, hi) + hashtable *ht; + hashitem *hi; +{ + --ht->ht_used; + hi->hi_key = HI_KEY_REMOVED; + hash_may_resize(ht); +} + +/* + * Shrink a hashtable when there is too much empty space. + * Grow a hashtable when there is not enough empty space. + * Returns OK or FAIL (out of memory). + */ + static int +hash_may_resize(ht) + hashtable *ht; +{ + hashitem temparray[HT_INIT_SIZE]; + hashitem *oldarray, *newarray; + hashitem *olditem, *newitem; + int newi; + int todo; + long_u oldsize, newsize; + long_u minsize; + long_u newmask; + long_u perturb; + +#ifdef HT_DEBUG + if (ht->ht_used > ht->ht_filled) + EMSG("hash_may_resize(): more used than filled"); + if (ht->ht_filled >= ht->ht_mask + 1) + EMSG("hash_may_resize(): table completely filled"); +#endif + + /* Return quickly for small tables with at least two NULL items. NULL + * items are required for the lookup to decide a key isn't there. */ + if (ht->ht_filled < HT_INIT_SIZE - 1 && ht->ht_array == ht->ht_smallarray) + return OK; + + /* + * Grow or refill the array when it's more than 2/3 full (including + * removed items, so that they get cleaned up). + * Shrink the array when it's less than 1/5 full. When growing it is at + * least 1/4 full (avoids repeated grow-shrink operations) + */ + oldsize = ht->ht_mask + 1; + if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5) + return OK; + + if (ht->ht_used > 10000) + minsize = ht->ht_used * 2; /* it's big, don't make too much room */ + else + minsize = ht->ht_used * 4; /* make plenty of room */ + newsize = HT_INIT_SIZE; + while (newsize < minsize) + { + newsize <<= 1; /* make sure it's always a power of 2 */ + if (newsize == 0) + return FAIL; /* overflow */ + } + + if (newsize == HT_INIT_SIZE) + { + /* Use the small array inside the hashdict structure. */ + newarray = ht->ht_smallarray; + if (ht->ht_array == newarray) + { + /* Moving from ht_smallarray to ht_smallarray! Happens when there + * are many removed items. Copy the items to be able to clean up + * removed items. */ + mch_memmove(temparray, newarray, sizeof(temparray)); + oldarray = temparray; + } + else + oldarray = ht->ht_array; + } + else + { + /* Allocate an array. */ + newarray = (hashitem *)alloc((unsigned)(sizeof(hashitem) * newsize)); + if (newarray == NULL) + { + /* Out of memory. When there are NULL items still return OK. + * Otherwise set ht_error, because lookup may result in a hang if + * we add another item. */ + if (ht->ht_filled < ht->ht_mask) + return OK; + ht->ht_error = TRUE; + return FAIL; + } + oldarray = ht->ht_array; + } + vim_memset(newarray, 0, (size_t)(sizeof(hashitem) * newsize)); + + /* + * Move all the items from the old array to the new one, placing them in + * the right spot. The new array won't have any removed items, thus this + * is also a cleanup action. + */ + newmask = newsize - 1; + todo = ht->ht_used; + for (olditem = oldarray; todo > 0; ++olditem) + if (olditem->hi_key != NULL && olditem->hi_key != HI_KEY_REMOVED) + { + /* + * The algorithm to find the spot to add the item is identical to + * the algorithm to find an item in hash_lookup(). But we only + * need to search for a NULL key, thus it's simpler. + */ + newi = olditem->hi_hash & newmask; + newitem = &newarray[newi]; + + if (newitem->hi_key != NULL) + for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT) + { + newi = (newi << 2) + newi + perturb + 1; + newitem = &newarray[newi & newmask]; + if (newitem->hi_key == NULL) + break; + } + *newitem = *olditem; + --todo; + } + + if (ht->ht_array != ht->ht_smallarray) + vim_free(ht->ht_array); + ht->ht_array = newarray; + ht->ht_mask = newmask; + ht->ht_filled = ht->ht_used; + ht->ht_error = FALSE; + + return OK; +} + +/* + * Get the hash number for a key. Uses the ElfHash algorithm, which is + * supposed to have an even distribution (suggested by Charles Campbell). + */ + static long_u +hash_hash(key) + char_u *key; +{ + long_u hash = 0; + long_u g; + char_u *p = key; + + while (*p != NUL) + { + hash = (hash << 4) + *p++; /* clear low 4 bits of hash, add char */ + g = hash & 0xf0000000L; /* g has high 4 bits of hash only */ + if (g != 0) + hash ^= g >> 24; /* xor g's high 4 bits into hash */ + } + + return hash; +} + +#endif diff --git a/src/proto/hashtable.pro b/src/proto/hashtable.pro new file mode 100644 --- /dev/null +++ b/src/proto/hashtable.pro @@ -0,0 +1,6 @@ +/* hashtable.c */ +void hash_init __ARGS((hashtable *ht)); +hashitem *hash_find __ARGS((hashtable *ht, char_u *key)); +int hash_add __ARGS((hashtable *ht, char_u *key)); +void hash_remove __ARGS((hashtable *ht, hashitem *hi)); +/* vim: set ft=c : */ diff --git a/src/testdir/Makefile b/src/testdir/Makefile --- a/src/testdir/Makefile +++ b/src/testdir/Makefile @@ -14,7 +14,7 @@ SCRIPTS = test1.out test2.out test3.out test38.out test39.out test40.out test41.out test42.out \ test43.out test44.out test45.out test46.out test47.out \ test48.out test49.out test51.out test52.out test53.out \ - test54.out + test54.out test55.out SCRIPTS_GUI = test16.out