diff lshlib.cpp @ 523:83e37b76b483 multiprobeLSH

insert some statistics gathering for hash tables, add _LSH_DEBUG_ macro to output LSH statistics on INDEX and QUERY
author mas01mc
date Wed, 28 Jan 2009 05:18:14 +0000
parents dad3d252462a
children 469b50a3dd84
line wrap: on
line diff
--- a/lshlib.cpp	Tue Jan 27 14:52:28 2009 +0000
+++ b/lshlib.cpp	Wed Jan 28 05:18:14 2009 +0000
@@ -1010,6 +1010,7 @@
     minColCount=O2_SERIAL_MAX_COLS;
     meanColCount=0;
     colCountN=0;
+    H::tablesPointCount = 0;
     for( y = 0 ;  y < H::N ; y++ ){
       colCount=0;
       if(bucket* bPtr = h[x][y]){
@@ -1047,12 +1048,13 @@
 	meanColCount+=colCount;
 	colCountN++;
       }
+      H::tablesPointCount+=colCount;
     }
     // Write END of table marker
     t1 = O2_SERIAL_TOKEN_ENDTABLE;
     WRITE_UNS32(&t1,"[end]");
     if(colCountN)
-      std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN 
+      std::cout << "#points: " << H::tablesPointCount  << " #rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN 
 		<< ", min = " << minColCount << ", max = " << maxColCount 
 		<< endl;
   }  
@@ -1343,7 +1345,12 @@
  
 void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){
   Uns32T x=0,y=0;
-
+#ifdef _LSH_DEBUG_
+  cout << "Loading hashtables..." << endl;
+  cout << "header pointCount = " << pointCount << endl;
+  cout << "forMerge = " << forMerge << endl;
+  Uns32T sumTablesPointCount = 0;
+#endif
   // Seek to hashtable base offset
   if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
     fclose(dbFile);
@@ -1352,6 +1359,7 @@
 
   // Read the hash tables into core (structure is given in header) 
   while( x < H::L){
+    tablesPointCount=0;
     if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){
       fclose(dbFile);
       error("Read error","unserialize_lsh_hashtables_format2()");
@@ -1395,7 +1403,7 @@
 	  // Use ARRAY CORE format with numElements counter
 	  token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements);
 #else
-	token = unserialize_hashtable_row_format2(dbFile, h[x]+y);	
+	token = unserialize_hashtable_row_format2(dbFile, h[x]+y);
 #endif	
 	// Check that token is valid
 	if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){
@@ -1411,7 +1419,14 @@
 	if(token==O2_SERIAL_TOKEN_T1)
 	  H::t1 = token;
       }
+#ifdef _LSH_DEBUG_
+    cout << "table " << x << " pointCount = " << tablesPointCount << endl;
+    sumTablesPointCount+=tablesPointCount;
+#endif    
   }
+#ifdef _LSH_DEBUG_
+  cout << "TOTAL pointCount = " << sumTablesPointCount << endl;
+#endif    
 #ifdef LSH_DUMP_CORE_TABLES
   dump_hashtables();
 #endif
@@ -1451,6 +1466,7 @@
     while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
       pointFound=true;
       bucket_insert_point(b);
+      tablesPointCount++;
       if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){
 	fclose(dbFile);
 	error("Read error H::p","unserialize_hashtable_row_format2");
@@ -1570,6 +1586,7 @@
   // Allocate a new dynamic list head at the end of the array
   bucket** listPtr = reinterpret_cast<bucket**> (ap);
   *listPtr = 0;
+  H::tablesPointCount += numPoints;
   // Return current token
   return H::t2; // return H::t2 which holds current token [E or T1]
 }