changeset 523:83e37b76b483 multiprobeLSH

insert some statistics gathering for hash tables, add _LSH_DEBUG_ macro to output LSH statistics on INDEX and QUERY
author mas01mc
date Wed, 28 Jan 2009 05:18:14 +0000
parents dad3d252462a
children 469b50a3dd84
files lshlib.cpp lshlib.h
diffstat 2 files changed, 26 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/lshlib.cpp	Tue Jan 27 14:52:28 2009 +0000
+++ b/lshlib.cpp	Wed Jan 28 05:18:14 2009 +0000
@@ -1010,6 +1010,7 @@
     minColCount=O2_SERIAL_MAX_COLS;
     meanColCount=0;
     colCountN=0;
+    H::tablesPointCount = 0;
     for( y = 0 ;  y < H::N ; y++ ){
       colCount=0;
       if(bucket* bPtr = h[x][y]){
@@ -1047,12 +1048,13 @@
 	meanColCount+=colCount;
 	colCountN++;
       }
+      H::tablesPointCount+=colCount;
     }
     // Write END of table marker
     t1 = O2_SERIAL_TOKEN_ENDTABLE;
     WRITE_UNS32(&t1,"[end]");
     if(colCountN)
-      std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN 
+      std::cout << "#points: " << H::tablesPointCount  << " #rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN 
 		<< ", min = " << minColCount << ", max = " << maxColCount 
 		<< endl;
   }  
@@ -1343,7 +1345,12 @@
  
 void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){
   Uns32T x=0,y=0;
-
+#ifdef _LSH_DEBUG_
+  cout << "Loading hashtables..." << endl;
+  cout << "header pointCount = " << pointCount << endl;
+  cout << "forMerge = " << forMerge << endl;
+  Uns32T sumTablesPointCount = 0;
+#endif
   // Seek to hashtable base offset
   if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
     fclose(dbFile);
@@ -1352,6 +1359,7 @@
 
   // Read the hash tables into core (structure is given in header) 
   while( x < H::L){
+    tablesPointCount=0;
     if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){
       fclose(dbFile);
       error("Read error","unserialize_lsh_hashtables_format2()");
@@ -1395,7 +1403,7 @@
 	  // Use ARRAY CORE format with numElements counter
 	  token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements);
 #else
-	token = unserialize_hashtable_row_format2(dbFile, h[x]+y);	
+	token = unserialize_hashtable_row_format2(dbFile, h[x]+y);
 #endif	
 	// Check that token is valid
 	if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){
@@ -1411,7 +1419,14 @@
 	if(token==O2_SERIAL_TOKEN_T1)
 	  H::t1 = token;
       }
+#ifdef _LSH_DEBUG_
+    cout << "table " << x << " pointCount = " << tablesPointCount << endl;
+    sumTablesPointCount+=tablesPointCount;
+#endif    
   }
+#ifdef _LSH_DEBUG_
+  cout << "TOTAL pointCount = " << sumTablesPointCount << endl;
+#endif    
 #ifdef LSH_DUMP_CORE_TABLES
   dump_hashtables();
 #endif
@@ -1451,6 +1466,7 @@
     while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
       pointFound=true;
       bucket_insert_point(b);
+      tablesPointCount++;
       if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){
 	fclose(dbFile);
 	error("Read error H::p","unserialize_hashtable_row_format2");
@@ -1570,6 +1586,7 @@
   // Allocate a new dynamic list head at the end of the array
   bucket** listPtr = reinterpret_cast<bucket**> (ap);
   *listPtr = 0;
+  H::tablesPointCount += numPoints;
   // Return current token
   return H::t2; // return H::t2 which holds current token [E or T1]
 }
--- a/lshlib.h	Tue Jan 27 14:52:28 2009 +0000
+++ b/lshlib.h	Wed Jan 28 05:18:14 2009 +0000
@@ -94,9 +94,13 @@
 #define WRITE_UNS32(VAL, TOKENSTR) if( fwrite(VAL, sizeof(Uns32T), 1, dbFile) != 1 ){\
   fclose(dbFile);error("write error in serial_write_format2",TOKENSTR);}	
 
-//#define LSH_DUMP_CORE_TABLES  // set to dump hashtables on load
+#define LSH_DUMP_CORE_TABLES  // set to dump hashtables on load
+#define _LSH_DEBUG_             // turn on debugging information
+
 //#define USE_U_FUNCTIONS       // set to use partial hashfunction re-use
 
+
+
 // Backward-compatible CORE ARRAY lsh index
 #define LSH_CORE_ARRAY  // Set to use arrays for hashtables rather than linked-lists
 #define LSH_LIST_HEAD_COUNTERS // Enable counters in hashtable list heads
@@ -232,6 +236,7 @@
   Uns32T bucketCount;  // count of number of point buckets allocated
   Uns32T pointCount;    // count of number of points inserted
   Uns32T collisionCount; // number of points collided in a hash-table row
+  Uns32T tablesPointCount; // count of points per hash table on load
 
   Uns32T t1;       // first hash table key
   Uns32T t2;       // second hash table key