#include "mm_hash.h" #include <string.h> #include <stdlib.h> /* * Simple hash table implementation --- Knuth calls this open hashing * with linear probes. I keep the load factor (the fraction of table * elements actually filled) below 3/4, so that the average number of * probes on a hit is 2.5, and on a miss is 7.5, according to Knuth's * formulae (if the hash function does its job). Roughly half of the * space taken up by the entries goes to empties, on average; however, * with the more usual linked-list chaining, some of that space would * be going into linked list pointers and heap overhead, and we'd get * more heap fragmentation and less cache locality. In any case, the * probes are cheap (we compare the hash codes before going to memcmp), * and this is slightly easier to code. * * One drawback to this method is that deleting one element can cause * some of the others to move around, but rehashing can cause that to * happen as well (the whole array gets copied). Don't save pointers * to elements across any modification to the table. * * The iteration code is carefully written to allow you to delete the * current element in an iteration without ill effects (it visits the * elements in an order that insures that the reshuffled elements, if * any, will be ones that you've already seen and will not see again). * But if you insert something, all bets are off, since the table may * be rehashed. If not, you still might or might not see the element * you added. * * The code is designed to work with Ralf Engelschall's MM library, a * generic interface to shared-memory facilities on quite a few posix- * like systems. However, it assumes that its callers will take care * of any necessary locking (you have to do that anyway for iteration * at least, to keep system integrity). * * copyright 2000, Robert S. Thau */ static unsigned long hash (const char *string, int len) { unsigned long hashcode = 0; /* Same hash function as in tcl --- for each character, * * h[n+1] = 9 * h[n] + c[n+1] * * with a moderately sleazy trick to avoid actually multiplying. * One extra wrinkle --- we often see C strings, which include the * terminating NUL for our purposes here. If we hashed that with * everything else, then all hash codes for C strings would be * multiples of nine, which is obviously not good; we kludge * around that by ignoring NUL characters altogether. */ while (len-- > 0) { char c = *string++; if (c != '\0') hashcode += (hashcode << 3) + c; } #ifdef BOGUS_DEBUGGING_KLUDGE /* it's useful to be able to predict hashcodes while debugging * table maintenance... */ hashcode = (unsigned long)atoi (string); #endif return hashcode; } int mm_hash_empty (mm_hash_table *t) { return t->nelts == t->nfree; } mm_hash_table *mm_hash_create (int initial_nelts, int own_data, void *alloc_cookie, void *(*alloc_func)(void *cookie, int size), void (*free_func)(void *cookie, void *mem)) { int i; mm_hash_table *new_table = (mm_hash_table *)alloc_func (alloc_cookie, sizeof(mm_hash_table)); if (new_table == NULL) return NULL; if (initial_nelts < MM_HASH_REAL_MIN_NELTS) initial_nelts = MM_HASH_REAL_MIN_NELTS; new_table->elts = (mm_hash_elt *)alloc_func (alloc_cookie, initial_nelts * sizeof(mm_hash_elt)); if (new_table->elts == NULL) { free_func (alloc_cookie, (void *)new_table); return NULL; } for (i = 0; i < initial_nelts; ++i) { new_table->elts[i].key = new_table->elts[i].data = NULL; } new_table->nelts = initial_nelts; new_table->nfree = initial_nelts; new_table->alloc_cookie = alloc_cookie; new_table->alloc_func = alloc_func; new_table->free_func = free_func; new_table->own_data = own_data; return new_table; } void mm_hash_clear (mm_hash_table *t) { int i; void (*free_func)(void *, void *) = t->free_func; void *cookie = t->alloc_cookie; for (i = 0; i < t->nelts; ++i) { mm_hash_elt *elt = &t->elts[i]; if (elt->key) { free_func (cookie, elt->key); elt->key = NULL; } if (elt->data) { if (t->own_data) free_func (cookie, elt->data); elt->data = NULL; } } t->nfree = t->nelts; } void mm_hash_destroy (mm_hash_table *t) { int i; void (*free_func)(void *, void *) = t->free_func; void *cookie = t->alloc_cookie; for (i = 0; i < t->nelts; ++i) { mm_hash_elt *elt = &t->elts[i]; if (elt->key) free_func (cookie, elt->key); if (t->own_data && elt->data) free_func (cookie, elt->data); } t->alloc_func = NULL; /* Break ptrs so reuse will fault */ t->free_func = NULL; t->elts = NULL; free_func (cookie, t->elts); free_func (cookie, t); } int mm_hash_rebuild (mm_hash_table *t, int new_nelts) { int i; mm_hash_elt *new_elts; /* Ensure enough room in new tbl */ if (new_nelts < t->nelts - t->nfree) return 0; /* Allocate and clear new element list */ new_elts = (mm_hash_elt *) t->alloc_func (t->alloc_cookie, new_nelts * sizeof(mm_hash_elt)); if (new_elts == NULL) return 0; /* failure */ for (i = 0; i < new_nelts; ++i) { new_elts[i].key = new_elts[i].data = NULL; } /* Re-hash existing elements into new array */ for (i = 0; i < t->nelts; ++i) { mm_hash_elt *elt = &t->elts[i]; if (elt->key != NULL) { int new_index = elt->fullhash % new_nelts; while (new_elts[new_index].key != NULL) { ++new_index; if (new_index == new_nelts) new_index = 0; } new_elts[new_index] = *elt; } } /* Install new array, free old one, and re-do accounting */ t->free_func (t->alloc_cookie, t->elts); t->elts = new_elts; t->nfree += (new_nelts - t->nelts); t->nelts = new_nelts; return 1; } mm_hash_elt *mm_hash_get (mm_hash_table *t, void *key, int keylen, int *created_p) { unsigned long fullhash = hash (key, keylen); int i = fullhash % t->nelts; if (created_p != NULL) *created_p = 0; /* Assume not created... */ /* There must be at least one free element in t->elts. We keep * going through the table until we either find our key, or find * a free element... */ while (1) { /* Is this element free? */ if (t->elts[i].key == NULL) break; /* Is it a match? */ if (t->elts[i].fullhash == fullhash && t->elts[i].keylen == keylen && !memcmp (t->elts[i].key, key, keylen)) { break; } /* No mas... try next element */ if (++i == t->nelts) i = 0; } /* If we found a match, return it */ if (t->elts[i].key != NULL) return &t->elts[i]; /* No match. If not creating, return failure */ if (created_p == NULL) return NULL; /* We are creating a new element. Rehash if the load factor * is above threshold... */ if ((t->nfree - 1) * MM_HASH_LOAD_LIMIT < t->nelts) { /* Must rehash. If that succeeds, try again... */ if (!mm_hash_rebuild (t, t->nelts * 2)) return NULL; /* failure */ else return mm_hash_get (t, key, keylen, created_p); } /* Didn't need to rehash... and we have the element we need to clobber */ t->elts[i].key = t->alloc_func (t->alloc_cookie, keylen); if (t->elts[i].key == NULL) return NULL; /* failure */ memcpy (t->elts[i].key, key, keylen); t->elts[i].keylen = keylen; t->elts[i].fullhash = fullhash; t->elts[i].data = NULL; --t->nfree; if (created_p != NULL) *created_p = 1; return &t->elts[i]; } int mm_hash_elt_set (mm_hash_table *t, mm_hash_elt *elt, void *data, int datalen) { void *data_copy = NULL; if (!t->own_data) return 0; /* Shouldn't be calling this at all */ if (elt->data != NULL) t->free_func (t->alloc_cookie, elt->data); elt->data = NULL; if (data != NULL) { data_copy = t->alloc_func (t->alloc_cookie, datalen); if (data_copy == NULL) return 0; /* failure */ memcpy (data_copy, data, datalen); } elt->data = data_copy; elt->datalen = datalen; return 1; /* success */ } int mm_hash_elt_delete (mm_hash_table *t, mm_hash_elt *elt) { int region_size, deleted_elt_idx, current_elt_idx; if (t->own_data && elt->data != NULL) t->free_func (t->alloc_cookie, elt->data); if (elt->key != NULL) t->free_func (t->alloc_cookie, elt->key); elt->data = NULL; elt->key = NULL; ++t->nfree; /* And now we need to scan elements past this one, to see if * any of them were placed there due to a collision with this * element, or something before it on the probe sequence, and * would be orphaned if not moved. */ region_size = 0; deleted_elt_idx = current_elt_idx = elt - t->elts; while (1) { int empty_posn, elts_before_current; ++region_size; if (++current_elt_idx == t->nelts) current_elt_idx = 0; if (t->elts[current_elt_idx].key == NULL) break; empty_posn = t->elts[current_elt_idx].fullhash % t->nelts; elts_before_current = current_elt_idx - empty_posn; if (elts_before_current < 0) elts_before_current += t->nelts; if (elts_before_current >= region_size) { t->elts[deleted_elt_idx] = t->elts[current_elt_idx]; t->elts[current_elt_idx].data = NULL; t->elts[current_elt_idx].key = NULL; deleted_elt_idx = current_elt_idx; region_size = 0; } } /* We always succeed, or claim to... */ return 1; } /* * Iteration functions --- note that we go in the opposite order from * the probe sequence so that, if the caller deletes an element, the * one that gets moved on top of it by the deletion routine, if any, * will be one that we've already seen. To avoid edge effects, we * start off with an element immediately preceding an empty. */ void mm_hash_iter_init (mm_hash_table *t, mm_hash_iter *iter) { int i; /* Search for the first empty */ for (i = 0; i < t->nelts; ++i) if (t->elts[i].key == NULL) break; /* Back up one, and start there */ iter->start_idx = i; iter->next_idx = i; } mm_hash_elt *mm_hash_iter_next (mm_hash_table *t, mm_hash_iter *iter) { /* Search backwards --- in the opposite order from the chaining * probes --- for the next nonempty element */ while (1) { if (--iter->next_idx < 0) iter->next_idx = t->nelts - 1; if (iter->next_idx == iter->start_idx) return NULL; if (t->elts[iter->next_idx].key != NULL) return &t->elts[iter->next_idx]; } }