Getting rid of hash_table_t and it's helper functions completely. Bye bye hash_table_t !

This commit is contained in:
Siteshwar Vashisht 2012-02-18 21:21:10 +05:30
parent 412894bfc8
commit c0ed169fdc
2 changed files with 1 additions and 653 deletions

488
util.cpp
View file

@ -1,7 +1,7 @@
/** \file util.c /** \file util.c
Generic utilities library. Generic utilities library.
Contains datastructures such as hash tables, automatically growing array lists, priority queues, etc. Contains datastructures such as automatically growing array lists, priority queues, etc.
*/ */
#include "config.h" #include "config.h"
@ -36,11 +36,6 @@
*/ */
#define MIN_SIZE 32 #define MIN_SIZE 32
/**
Minimum size for hash tables
*/
#define HASH_MIN_SIZE 7
/** /**
Maximum number of characters that can be inserted using a single Maximum number of characters that can be inserted using a single
call to sb_printf. This is needed since vswprintf doesn't tell us call to sb_printf. This is needed since vswprintf doesn't tell us
@ -100,487 +95,6 @@ int maxi( int a,
return a>b?a:b; return a>b?a:b;
} }
/* Hash table functions */
void hash_init2( hash_table_t *h,
int (*hash_func)(void *key),
int (*compare_func)(void *key1, void *key2),
size_t capacity)
{
size_t sz = 32;
while( sz < (capacity*4/3) )
sz*=2;
/*
Make sure the size is a Mersenne number. Should hopfully be a
reasonably good size with regard to avoiding patterns of collisions.
*/
sz--;
h->arr = (hash_struct_t *)malloc( sizeof(hash_struct_t)*sz );
if( !h->arr )
{
oom_handler( h );
return;
}
h->size = sz;
for( size_t i=0; i< sz; i++ )
h->arr[i].key = 0;
h->count=0;
h->hash_func = hash_func;
h->compare_func = compare_func;
h->cache=-1;
}
void hash_init( hash_table_t *h,
int (*hash_func)(void *key),
int (*compare_func)(void *key1, void *key2) )
{
h->arr = 0;
h->size = 0;
h->count=0;
h->hash_func = hash_func;
h->compare_func = compare_func;
h->cache=-1;
}
void hash_destroy( hash_table_t *h )
{
free( h->arr );
}
/**
Search for the specified hash key in the table
\return index in the table, or to the first free index if the key is not in the table
*/
static int hash_search( hash_table_t *h,
void *key )
{
int hv;
int pos;
if( h->cache>=0 && h->arr[h->cache].key)
{
if( h->compare_func( h->arr[h->cache].key, key ) )
{
return h->cache;
}
}
hv = h->hash_func( key );
pos = (hv & 0x7fffffff) % h->size;
while(1)
{
if( (h->arr[pos].key == 0 ) ||
( h->compare_func( h->arr[pos].key, key ) ) )
{
h->cache = pos;
return pos;
}
pos++;
pos %= h->size;
}
}
/**
Reallocate the hash array. This is quite expensive, as every single entry has to be rehashed and moved.
*/
static int hash_realloc( hash_table_t *h,
int sz )
{
/* Avoid reallocating when using pathetically small tables */
if( ( sz < h->size ) && (h->size < HASH_MIN_SIZE))
return 1;
sz = maxi( sz, HASH_MIN_SIZE );
hash_struct_t *old_arr = h->arr;
int old_size = h->size;
int i;
h->cache = -1;
h->arr = (hash_struct_t *)malloc( sizeof( hash_struct_t) * sz );
if( h->arr == 0 )
{
h->arr = old_arr;
oom_handler( h );
return 0;
}
memset( h->arr,
0,
sizeof( hash_struct_t) * sz );
h->size = sz;
for( i=0; i<old_size; i++ )
{
if( old_arr[i].key != 0 )
{
int pos = hash_search( h, old_arr[i].key );
h->arr[pos].key = old_arr[i].key;
h->arr[pos].data = old_arr[i].data;
}
}
free( old_arr );
return 1;
}
int hash_put( hash_table_t *h,
const void *key,
const void *data )
{
int pos;
if( (float)(h->count+1)/h->size > 0.75f )
{
if( !hash_realloc( h, (h->size+1) * 2 -1 ) )
{
return 0;
}
}
pos = hash_search( h, (void *)key );
if( h->arr[pos].key == 0 )
{
h->count++;
}
h->arr[pos].key = (void *)key;
h->arr[pos].data = (void *)data;
return 1;
}
void *hash_get( hash_table_t *h,
const void *key )
{
if( !h->count )
return 0;
int pos = hash_search( h, (void *)key );
if( h->arr[pos].key == 0 )
{
return 0;
}
else
{
void *res =h->arr[pos].data;
return res;
}
}
void *hash_get_key( hash_table_t *h,
const void *key )
{
if( !h->count )
return 0;
int pos = hash_search( h, (void *)key );
if( h->arr[pos].key == 0 )
return 0;
else
return h->arr[pos].key;
}
int hash_get_count( hash_table_t *h)
{
return h->count;
}
void hash_remove( hash_table_t *h,
const void *key,
void **old_key,
void **old_val )
{
if( !h->count )
{
if( old_key != 0 )
*old_key = 0;
if( old_val != 0 )
*old_val = 0;
return;
}
int pos = hash_search( h, (void *)key );
int next_pos;
if( h->arr[pos].key == 0 )
{
if( old_key != 0 )
*old_key = 0;
if( old_val != 0 )
*old_val = 0;
return;
}
h->count--;
if( old_key != 0 )
*old_key = h->arr[pos].key;
if( old_val != 0 )
*old_val = h->arr[pos].data;
h->arr[pos].key = 0;
next_pos = pos+1;
next_pos %= h->size;
while( h->arr[next_pos].key != 0 )
{
int hv = h->hash_func( h->arr[next_pos].key );
int ideal_pos = ( hv & 0x7fffffff) % h->size;
int dist_old = (next_pos - ideal_pos + h->size)%h->size;
int dist_new = (pos - ideal_pos + h->size)%h->size;
if ( dist_new < dist_old )
{
h->arr[pos].key = h->arr[next_pos].key;
h->arr[pos].data = h->arr[next_pos].data;
h->arr[next_pos].key = 0;
pos = next_pos;
}
next_pos++;
next_pos %= h->size;
}
if( (float)(h->count+1)/h->size < 0.2f && h->count < 63 )
{
hash_realloc( h, (h->size+1) / 2 -1 );
}
return;
}
int hash_contains( hash_table_t *h,
const void *key )
{
if( !h->count )
return 0;
int pos = hash_search( h, (void *)key );
return h->arr[pos].key != 0;
}
/**
Push hash value into array_list_t
*/
static void hash_put_data( void *key,
void *data,
void *al )
{
al_push( (array_list_t *)al,
data );
}
void hash_get_data( hash_table_t *h,
array_list_t *arr )
{
hash_foreach2( h, &hash_put_data, arr );
}
/**
Push hash key into array_list_t
*/
static void hash_put_key( void *key, void *data, void *al )
{
al_push( (array_list_t *)al, key );
}
void hash_get_keys( hash_table_t *h,
array_list_t *arr )
{
hash_foreach2( h, &hash_put_key, arr );
}
void hash_foreach( hash_table_t *h,
void (*func)( void *, void *) )
{
int i;
for( i=0; i<h->size; i++ )
{
if( h->arr[i].key != 0 )
{
func( h->arr[i].key, h->arr[i].data );
}
}
}
void hash_foreach2( hash_table_t *h,
void (*func)( void *, void *, void * ),
void *aux )
{
int i;
for( i=0; i<h->size; i++ )
{
if( h->arr[i].key != 0 )
{
func( h->arr[i].key, h->arr[i].data, aux );
}
}
}
/**
Helper function for hash_wcs_func
*/
static unsigned int rotl1( unsigned int in )
{
return (in<<1|in>>31);
}
/**
Helper function for hash_wcs_func
*/
static unsigned int rotl5( unsigned int in )
{
return (in<<5|in>>27);
}
/**
Helper function for hash_wcs_func
*/
static unsigned int rotl30( unsigned int in )
{
return (in<<30|in>>2);
}
/**
The number of words of input used in each lap by the sha-like
string hashing algorithm.
*/
#define WORD_COUNT 16
int hash_wcs_func( void *data )
{
const wchar_t *in = (const wchar_t *)data;
unsigned int a,b,c,d,e;
int t;
unsigned int k0=0x5a827999u;
unsigned int k1 =0x6ed9eba1u;
unsigned int w[2*WORD_COUNT];
/*
Same constants used by sha1
*/
a=0x67452301u;
b=0xefcdab89u;
c=0x98badcfeu;
d=0x10325476u;
e=0xc3d2e1f0u;
if( data == 0 )
return 0;
while( *in )
{
int i;
/*
Read WORD_COUNT words of data into w
*/
for( i=0; i<WORD_COUNT; i++ )
{
if( !*in)
{
/*
We have reached EOF, fill in the rest with zeroes
*/
for( ;i<WORD_COUNT; i++ )
w[i]=0;
}
else
w[i]=*in++;
}
/*
And fill up the rest by rotating the previous content
*/
for( i=WORD_COUNT; i<(2*WORD_COUNT); i++ )
{
w[i]=rotl1(w[i-1]^w[i-(WORD_COUNT/2)]^w[i-(WORD_COUNT/2-1)]^w[i-WORD_COUNT]);
}
/*
Only 2*WORD_COUNT laps, not 80 like in sha1. Only two types
of laps, not 4 like in sha1
*/
for( t=0; t<WORD_COUNT; t++ )
{
unsigned int temp;
temp = (rotl5(a)+(b^c^d)+e+w[t]+k0);
e=d;
d=c;
c=rotl30(b);
b=a;
a=temp;
}
for( t=WORD_COUNT; t<(2*WORD_COUNT); t++ )
{
unsigned int temp;
temp = (rotl5(a)+((b&c)|(b&d)|(c&d))+e+w[t]+k1);
e=d;
d=c;
c=rotl30(b);
b=a;
a=temp;
}
}
/*
Implode from 160 to 32 bit hash and return
*/
return a^b^c^d^e;
}
int hash_wcs_cmp( void *a, void *b )
{
return wcscmp((wchar_t *)a,(wchar_t *)b) == 0;
}
int hash_str_cmp( void *a, void *b )
{
return strcmp((char *)a,(char *)b) == 0;
}
int hash_str_func( void *data )
{
int res = 0x67452301u;
const char *str = (const char *)data;
while( *str )
res = (18499*rotl5(res)) ^ *str++;
return res;
}
int hash_ptr_func( void *data )
{
return (int)(long) data;
}
/**
Hash comparison function suitable for direct pointer comparison
*/
int hash_ptr_cmp( void *a,
void *b )
{
return a == b;
}
/** /**
Real implementation of all al_push_* versions. Pushes arbitrary Real implementation of all al_push_* versions. Pushes arbitrary
element to end of list. element to end of list.

166
util.h
View file

@ -42,50 +42,6 @@ typedef union
} }
anything_t; anything_t;
/**
Internal struct used by hash_table_t.
*/
typedef struct
{
/** Hash key*/
void *key;
/** Value */
void *data;
}
hash_struct_t;
/**
Data structure for the hash table implementaion. A hash table allows for
retrieval and removal of any element in O(1), so long as a proper
hash function is supplied.
The hash table is implemented using a single hash function and
element storage directly in the array. When a collision occurs, the
hashtable iterates until a zero element is found. When the table is
75% full, it will automatically reallocate itself. This
reallocation takes O(n) time. The table is guaranteed to never be
more than 75% full or less than 30% full (Unless the table is
nearly empty). Its size is always a Mersenne number.
*/
typedef struct hash_table
{
/** The array containing the data */
hash_struct_t *arr;
/** A simple one item cache. This should always point to the index of the last item to be used */
int cache;
/** Number of elements */
int count;
/** Length of array */
int size;
/** Hash function */
int (*hash_func)( void *key );
/** Comparison function */
int (*compare_func)( void *key1, void *key2 );
}
hash_table_t;
/** /**
Data structure for an automatically resizing dynamically allocated Data structure for an automatically resizing dynamically allocated
priority queue. A priority queue allows quick retrieval of the priority queue. A priority queue allows quick retrieval of the
@ -193,128 +149,6 @@ int mini( int a, int b );
to never be less than 50% full. to never be less than 50% full.
*/ */
/**
Initialize a hash table. The hash function must never return the value 0.
*/
void hash_init( hash_table_t *h,
int (*hash_func)( void *key),
int (*compare_func)( void *key1, void *key2 ) );
/**
Initialize a hash table. The hash function must never return the value 0.
*/
void hash_init2( hash_table_t *h,
int (*hash_func)( void *key ),
int (*compare_func)( void *key1, void *key2 ),
size_t capacity);
/**
Destroy the hash table and free associated memory.
*/
void hash_destroy( hash_table_t *h );
/**
Set the key/value pair for the hashtable.
*/
int hash_put( hash_table_t *h,
const void *key,
const void *data );
/**
Returns the data with the associated key, or 0 if no such key is in the hashtable
*/
void *hash_get( hash_table_t *h,
const void *key );
/**
Returns the hash tables version of the specified key
*/
void *hash_get_key( hash_table_t *h,
const void *key );
/**
Returns the number of key/data pairs in the table.
*/
int hash_get_count( hash_table_t *h);
/**
Remove the specified key from the hash table if it exists. Do nothing if it does not exist.
\param h The hashtable
\param key The key
\param old_key If not 0, a pointer to the old key will be stored at the specified address
\param old_data If not 0, a pointer to the data will be stored at the specified address
*/
void hash_remove( hash_table_t *h,
const void *key,
void **old_key,
void **old_data );
/**
Checks whether the specified key is in the hash table
*/
int hash_contains( hash_table_t *h,
const void *key );
/**
Appends all keys in the table to the specified list
*/
void hash_get_keys( hash_table_t *h,
array_list_t *arr );
/**
Appends all data elements in the table to the specified list
*/
void hash_get_data( hash_table_t *h,
array_list_t *arr );
/**
Call the function func for each key/data pair in the table
*/
void hash_foreach( hash_table_t *h,
void (*func)( void *, void * ) );
/**
Same as hash_foreach, but the function func takes an additional
argument, which is provided by the caller in the variable aux
*/
void hash_foreach2( hash_table_t *h, void (*func)( void *,
void *,
void *),
void *aux );
/**
Hash function suitable for character strings.
*/
int hash_str_func( void *data );
/**
Hash comparison function suitable for character strings
*/
int hash_str_cmp( void *a,
void *b );
/**
Hash function suitable for wide character strings. Uses a version
of the sha cryptographic function which is simplified in order to
returns a 32-bit number.
*/
int hash_wcs_func( void *data );
/**
Hash comparison function suitable for wide character strings
*/
int hash_wcs_cmp( void *a,
void *b );
/**
Hash function suitable for direct pointer comparison
*/
int hash_ptr_func( void *data );
/**
Hash comparison function suitable for direct pointer comparison
*/
int hash_ptr_cmp( void *a,
void *b );
/** /**
Append element to list Append element to list