#include "mm_hash.h"
#include <string.h>
#include <stdlib.h>

/*
 * Simple hash table implementation --- Knuth calls this open hashing
 * with linear probes.  I keep the load factor (the fraction of table
 * elements actually filled) below 3/4, so that the average number of
 * probes on a hit is 2.5, and on a miss is 7.5, according to Knuth's
 * formulae (if the hash function does its job).  Roughly half of the
 * space taken up by the entries goes to empties, on average; however,
 * with the more usual linked-list chaining, some of that space would
 * be going into linked list pointers and heap overhead, and we'd get
 * more heap fragmentation and less cache locality.  In any case, the
 * probes are cheap (we compare the hash codes before going to memcmp),
 * and this is slightly easier to code.
 *
 * One drawback to this method is that deleting one element can cause
 * some of the others to move around, but rehashing can cause that to
 * happen as well (the whole array gets copied).  Don't save pointers
 * to elements across any modification to the table.
 *
 * The iteration code is carefully written to allow you to delete the
 * current element in an iteration without ill effects (it visits the
 * elements in an order that insures that the reshuffled elements, if
 * any, will be ones that you've already seen and will not see again).
 * But if you insert something, all bets are off, since the table may
 * be rehashed.  If not, you still might or might not see the element
 * you added.
 *
 * The code is designed to work with Ralf Engelschall's MM library, a
 * generic interface to shared-memory facilities on quite a few posix-
 * like systems.  However, it assumes that its callers will take care
 * of any necessary locking (you have to do that anyway for iteration
 * at least, to keep system integrity).
 *
 * copyright 2000, Robert S. Thau
 */

static unsigned long hash (const char *string, int len)
{
    unsigned long hashcode = 0;

    /* Same hash function as in tcl --- for each character,
     *
     * h[n+1] = 9 * h[n] + c[n+1]
     *
     * with a moderately sleazy trick to avoid actually multiplying.
     * One extra wrinkle --- we often see C strings, which include the
     * terminating NUL for our purposes here.  If we hashed that with
     * everything else, then all hash codes for C strings would be
     * multiples of nine, which is obviously not good; we kludge
     * around that by ignoring NUL characters altogether.
     */

    while (len-- > 0) {
	char c = *string++;
	if (c != '\0') hashcode += (hashcode << 3) + c;
    }

#ifdef BOGUS_DEBUGGING_KLUDGE
    /* it's useful to be able to predict hashcodes while debugging
     * table maintenance...
     */
    hashcode = (unsigned long)atoi (string); 
#endif    
    return hashcode;
}

int mm_hash_empty (mm_hash_table *t) {
    return t->nelts == t->nfree;
}

mm_hash_table *mm_hash_create (int initial_nelts,
			       int own_data,
			       void *alloc_cookie,
			       void *(*alloc_func)(void *cookie, int size),
			       void (*free_func)(void *cookie, void *mem))
{
    int i;    
    mm_hash_table *new_table =
	(mm_hash_table *)alloc_func (alloc_cookie, sizeof(mm_hash_table));
    
    if (new_table == NULL) return NULL;

    if (initial_nelts < MM_HASH_REAL_MIN_NELTS)
	initial_nelts = MM_HASH_REAL_MIN_NELTS;
    
    new_table->elts =
	(mm_hash_elt *)alloc_func (alloc_cookie,
				   initial_nelts * sizeof(mm_hash_elt));

    if (new_table->elts == NULL) {
	free_func (alloc_cookie, (void *)new_table);
	return NULL;
    }

    for (i = 0; i < initial_nelts; ++i) {
	new_table->elts[i].key = new_table->elts[i].data = NULL;
    }

    new_table->nelts = initial_nelts;
    new_table->nfree = initial_nelts;
    new_table->alloc_cookie = alloc_cookie;
    new_table->alloc_func = alloc_func;
    new_table->free_func = free_func;
    new_table->own_data = own_data;
    return new_table;
}

void mm_hash_clear (mm_hash_table *t)
{
    int i;
    void (*free_func)(void *, void *) = t->free_func;
    void *cookie = t->alloc_cookie;

    for (i = 0; i < t->nelts; ++i) {
	mm_hash_elt *elt = &t->elts[i];

	if (elt->key) {
	    free_func (cookie, elt->key);
	    elt->key = NULL;
	}
	if (elt->data) {
	    if (t->own_data) free_func (cookie, elt->data);
	    elt->data = NULL;
	}
    }

    t->nfree = t->nelts;
}

void mm_hash_destroy (mm_hash_table *t)
{
    int i;
    void (*free_func)(void *, void *) = t->free_func;
    void *cookie = t->alloc_cookie;

    for (i = 0; i < t->nelts; ++i) {
	mm_hash_elt *elt = &t->elts[i];

	if (elt->key) free_func (cookie, elt->key);
	if (t->own_data && elt->data) free_func (cookie, elt->data);
    }

    t->alloc_func = NULL;	/* Break ptrs so reuse will fault */
    t->free_func = NULL;
    t->elts = NULL;
    
    free_func (cookie, t->elts);
    free_func (cookie, t);
}

int mm_hash_rebuild (mm_hash_table *t, int new_nelts)
{
    int i;
    mm_hash_elt *new_elts;
    
    /* Ensure enough room in new tbl */
    
    if (new_nelts < t->nelts - t->nfree) return 0;

    /* Allocate and clear new element list */
    
    new_elts = (mm_hash_elt *) t->alloc_func (t->alloc_cookie,
					      new_nelts * sizeof(mm_hash_elt));

    if (new_elts == NULL) return 0; /* failure */

    for (i = 0; i < new_nelts; ++i) {
	new_elts[i].key = new_elts[i].data = NULL;
    }

    /* Re-hash existing elements into new array */

    for (i = 0; i < t->nelts; ++i) {
	mm_hash_elt *elt = &t->elts[i];
	
	if (elt->key != NULL) {
	    int new_index = elt->fullhash % new_nelts;

	    while (new_elts[new_index].key != NULL) {
		++new_index;
		if (new_index == new_nelts) new_index = 0;
	    }
	    
	    new_elts[new_index] = *elt;
	}
    }

    /* Install new array, free old one, and re-do accounting */

    t->free_func (t->alloc_cookie, t->elts);
    t->elts = new_elts;
    t->nfree += (new_nelts - t->nelts);
    t->nelts = new_nelts;
    return 1;
}

mm_hash_elt *mm_hash_get (mm_hash_table *t, void *key,
			  int keylen, int *created_p)
{
    unsigned long fullhash = hash (key, keylen);
    int i = fullhash % t->nelts;

    if (created_p != NULL) *created_p = 0; /* Assume not created... */
    
    /* There must be at least one free element in t->elts.  We keep
     * going through the table until we either find our key, or find
     * a free element...
     */
    
    while (1) {
	/* Is this element free? */
	if (t->elts[i].key == NULL) break;

	/* Is it a match? */
	if (t->elts[i].fullhash == fullhash
	    && t->elts[i].keylen == keylen
	    && !memcmp (t->elts[i].key, key, keylen))
	{
	    break;
	}
	
	/* No mas... try next element */
	if (++i == t->nelts) i = 0;
    }

    /* If we found a match, return it */
    
    if (t->elts[i].key != NULL) return &t->elts[i];

    /* No match.  If not creating, return failure */

    if (created_p == NULL) return NULL;

    /* We are creating a new element.  Rehash if the load factor
     * is above threshold...
     */

    if ((t->nfree - 1) * MM_HASH_LOAD_LIMIT < t->nelts) {
	/* Must rehash.  If that succeeds, try again... */
	
	if (!mm_hash_rebuild (t, t->nelts * 2))	
	    return NULL;		/* failure */
	else
	    return mm_hash_get (t, key, keylen, created_p);
    }

    /* Didn't need to rehash... and we have the element we need to clobber */

    t->elts[i].key = t->alloc_func (t->alloc_cookie, keylen);

    if (t->elts[i].key == NULL) return NULL; /* failure */

    memcpy (t->elts[i].key, key, keylen);
    t->elts[i].keylen = keylen;
    t->elts[i].fullhash = fullhash;
    t->elts[i].data = NULL;
    --t->nfree;
    if (created_p != NULL) *created_p = 1;
    return &t->elts[i];
}

int mm_hash_elt_set (mm_hash_table *t, mm_hash_elt *elt,
		     void *data, int datalen)
{
    void *data_copy = NULL;

    if (!t->own_data) return 0;	/* Shouldn't be calling this at all */
    
    if (elt->data != NULL) t->free_func (t->alloc_cookie, elt->data);
    elt->data = NULL;
    
    if (data != NULL) {
	data_copy = t->alloc_func (t->alloc_cookie, datalen);
	if (data_copy == NULL) return 0; /* failure */
	memcpy (data_copy, data, datalen);
    }

    elt->data = data_copy;
    elt->datalen = datalen;
    return 1;			/* success */
}

int mm_hash_elt_delete (mm_hash_table *t, mm_hash_elt *elt)
{
    int region_size, deleted_elt_idx, current_elt_idx;
    
    if (t->own_data && elt->data != NULL)
	t->free_func (t->alloc_cookie, elt->data);
    
    if (elt->key != NULL)
	t->free_func (t->alloc_cookie, elt->key);

    elt->data = NULL;
    elt->key  = NULL;
    ++t->nfree;
    
    /* And now we need to scan elements past this one, to see if
     * any of them were placed there due to a collision with this
     * element, or something before it on the probe sequence, and
     * would be orphaned if not moved.
     */

    region_size = 0;
    deleted_elt_idx = current_elt_idx = elt - t->elts;

    while (1) {
	int empty_posn, elts_before_current;
	
	++region_size;
	if (++current_elt_idx == t->nelts) current_elt_idx = 0;

	if (t->elts[current_elt_idx].key == NULL)
	    break;
	
	empty_posn = t->elts[current_elt_idx].fullhash % t->nelts;
	elts_before_current = current_elt_idx - empty_posn;

	if (elts_before_current < 0)
	    elts_before_current += t->nelts;

	if (elts_before_current >= region_size) {
	    t->elts[deleted_elt_idx] = t->elts[current_elt_idx];
	    t->elts[current_elt_idx].data = NULL;
	    t->elts[current_elt_idx].key = NULL;
	    deleted_elt_idx = current_elt_idx;
	    region_size = 0;
	}
    }
    
    /* We always succeed, or claim to... */
    
    return 1;
}

/*
 * Iteration functions --- note that we go in the opposite order from
 * the probe sequence so that, if the caller deletes an element, the
 * one that gets moved on top of it by the deletion routine, if any,
 * will be one that we've already seen.  To avoid edge effects, we
 * start off with an element immediately preceding an empty.
 */

void mm_hash_iter_init (mm_hash_table *t, mm_hash_iter *iter)
{
    int i;
    
    /* Search for the first empty */

    for (i = 0; i < t->nelts; ++i)
	if (t->elts[i].key == NULL)
	    break;

    /* Back up one, and start there */

    iter->start_idx = i;
    iter->next_idx = i;
}

mm_hash_elt *mm_hash_iter_next (mm_hash_table *t, mm_hash_iter *iter)
{
    /* Search backwards --- in the opposite order from the chaining
     * probes --- for the next nonempty element
     */
    
    while (1) {
	if (--iter->next_idx < 0) iter->next_idx = t->nelts - 1;
	if (iter->next_idx == iter->start_idx) return NULL;
	
	if (t->elts[iter->next_idx].key != NULL)
	    return &t->elts[iter->next_idx];
    }
}