#include<iostream.h> #include<string> #include<fstream.h> #define DEBUG 0 const int HASH_BASE = 1009; const int SHIFT = 128; int hash_function( string s ); int hash_function_mult( string s ); long pow(int, int); void print_histogram( int*, int ); void main(){ string word; ifstream infile; infile.open("word_list.txt"); if( !infile ) cout << "File open failed\n"; else cout << "Word file opened\n\n"; // if we were doing a full-scale implementation of a hashtable to store // words, each entry of hash-array would be either a single word or // a list of words ("bucket") whose indices (computed by the hash_function) // collide there. int hash_array[ HASH_BASE ]; for(int i=0; i<HASH_BASE; i++) hash_array[i]=0; while( infile >> word ) { // if we were doing a full-scale implementation of a hashtable, we would // store word in the hash at the address hash_function( word ) . // For now, we just count the number of collisions at that address hash_array[ hash_function( word ) ] += 1; } if( DEBUG ) for(int i=0; i<HASH_BASE; i++) cout << hash_array[i] << ' '; cout << "\n"; print_histogram( hash_array, HASH_BASE ); } int hash_function( string s ){ unsigned int h = 0; for( int i=0; i<s.length(); i++) h += ((unsigned int) s[i]) * pow( SHIFT, i%4); // cout << h << " --> " << h%HASH_BASE << endl; return h%HASH_BASE; } int hash_function_mult( string s ){ unsigned int h = 0; for( int i=0; i<s.length(); i++) h += ((unsigned int) s[i]) * pow(256, i%4); double aux = h * 0.6180339887; double aux1 = aux - (unsigned int)( aux ); if ( aux1 > 1 ) cout << "ERROR: " << aux << " " << int (aux) << "\n"; return int(HASH_BASE*aux1); } long pow(int x, int n){ long r=1; for(int i=0; i<n; i++) r *= x; return r; } // A is filled with numbers from the range 0..max_entry void print_histogram( int* A, int size ){ // find the max entry int max_entry = 0; for( int i=0; i<size; i++) if( A[i] > max_entry ) max_entry = A[i]; int *hist = new int[max_entry+1]; for(int i=0; i<=max_entry; i++) hist[i]=0; for( int i=0; i<size; i++) hist[ A[i] ] +=1; cout << "Max entry: " << max_entry << "\n"; for( int i=0; i<=max_entry; i++) cout << i << " : " << hist[i] << "\n"; cout << "\n"; }