changeset 525:11dd6eab15c8 multiprobeLSH

_LSH_DEBUG_ option now reports correct point counts per hashtable read.
author mas01mc
date Wed, 28 Jan 2009 17:18:58 +0000
parents 469b50a3dd84
children cbd5841e6b70
files Makefile lshlib.cpp lshlib.h
diffstat 3 files changed, 34 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Wed Jan 28 16:02:17 2009 +0000
+++ b/Makefile	Wed Jan 28 17:18:58 2009 +0000
@@ -17,14 +17,19 @@
 MINORVERSION=0
 LIBRARY=lib$(EXECUTABLE).so.$(SOVERSION).$(MINORVERSION)
 
-override CFLAGS+=-ggdb -g -fPIC
+override CFLAGS+=-O3 -g -fPIC
 
 # set to DUMP hashtables on QUERY load
 #override CFLAGS+=-DLSH_DUMP_CORE_TABLES
 
+# set to turn on debugging information for LSH hashtables
+#override CFLAGS+=-D_LSH_DEBUG_
+
 # set to increase multiple probes in LSH QUERY (allowable range = 1 ... lsh_k*2)
 #override CFLAGS+=-DLSH_MULTI_PROBE_COUNT=10
 
+
+
 ifeq ($(shell uname),Linux)
 override CFLAGS+=-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
 endif
--- a/lshlib.cpp	Wed Jan 28 16:02:17 2009 +0000
+++ b/lshlib.cpp	Wed Jan 28 17:18:58 2009 +0000
@@ -1416,10 +1416,12 @@
 	  fclose(dbFile);
 	  error("Read error: numElements","unserialize_lsh_hashtables_format2()");
 	}
-	
+
+	/*	
 #ifdef _LSH_DEBUG_
 	cout << "[" << x << "," << y << "] numElements(disk) = " << numElements;
 #endif    	
+	*/
 	// BACKWARD COMPATIBILITY: check to see if T2 or END token was read
 	if(numElements==O2_SERIAL_TOKEN_T2 || numElements==O2_SERIAL_TOKEN_ENDTABLE ){
 	  forMerge=true; // Force use of dynamic linked list core format
@@ -1450,7 +1452,7 @@
 	  H::t1 = token;
       }
 #ifdef _LSH_DEBUG_
-    cout << " pointCount = " << tablesPointCount << endl;
+    cout << "[T " << x-1 << "] pointCount = " << tablesPointCount << endl;
     sumTablesPointCount+=tablesPointCount;
 #endif    
   }
@@ -1549,6 +1551,7 @@
              secondPtr=ap++;\
              *secondPtr=0;\
              numPoints++;\
+	     numSingletons++;\
     }\
     if(numPointsThisBucket>1){\
       *firstPtr |=  ( (numPointsThisBucket-1) & 0x3 ) << SKIP_BITS_LEFT_SHIFT_MSB;\
@@ -1560,13 +1563,11 @@
   Uns32T numPoints = 0;
   Uns32T* firstPtr = 0;
   Uns32T* secondPtr = 0;
+  Uns32T numSingletons = 0; // Count single point puckets because we encode them with 2 points (for skip)
 
   // Initialize new row
   if(!*rowPP){
     *rowPP = new bucket();
-#ifdef _LSH_DEBUG_
-    cout << " () ";
-#endif
 #ifdef LSH_LIST_HEAD_COUNTERS
     (*rowPP)->t2 = 0; // Use t2 as a collision counter for the row
     (*rowPP)->next = 0;
@@ -1589,12 +1590,22 @@
       cout << "last_t2=" << last_t2 << ", t2=" << H::t2 << endl;
     TEST_TOKEN(H::t2<last_t2, "t2 tokens not in ascending order");
     last_t2 = H::t2;
+    /*
+#ifdef _LSH_DEBUG_
+      cout << "+" << H::t2 << "+";
+#endif
+    */
     *ap++ = H::t2; // Insert t2 value into array
     numBuckets++;
     READ_UNS32T(&(H::p), "Read error H::p"); 
     while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){      
       if(numPointsThisBucket==MAX_POINTS_IN_BUCKET_CORE_ARRAY){
 	ENCODE_POINT_SKIP_BITS;
+	/*
+#ifdef _LSH_DEBUG_
+      cout << "*" << H::t2 << "*";
+#endif
+	*/
 	*ap++ = H::t2; // Extra element
 	numBuckets++;  // record this as a new bucket
 	numPointsThisBucket=0; // reset bucket point counter
@@ -1605,12 +1616,18 @@
       else if( numPointsThisBucket == 2 )
 	secondPtr = ap;  // store pointer to first point to insert skip bits later on
       numPoints++;
+      /*
+#ifdef _LSH_DEBUG_
+      cout << "(" << H::p << ":" << numPoints << ")";
+#endif
+      */
+
       *ap++ = H::p;
       READ_UNS32T(&(H::p), "Read error H::p"); 
     }
     ENCODE_POINT_SKIP_BITS;    
     H::t2 = H::p; // Copy last found token to t2
-  }    
+  }
   // Reallocate the row to its actual size
   CR_ASSERT(rowPtr->next = (bucket*) realloc(rowPtr->next, (numBuckets+numPoints+1)*sizeof(Uns32T)+sizeof(bucket**)));
   // Record the sizes at the head of the row
@@ -1623,10 +1640,12 @@
   // Allocate a new dynamic list head at the end of the array
   bucket** listPtr = reinterpret_cast<bucket**> (ap);
   *listPtr = 0;
+  /*
 #ifdef _LSH_DEBUG_
-  cout << " numBuckets=" << numBuckets << " numPoints=" << numPoints << " numElements(array) " << numBuckets+numPoints << " " << endl;
+  cout << " numBuckets=" << numBuckets << " numPoints=" << numPoints - numSingletons << " numElements(array) " << numBuckets+numPoints - numSingletons << " " << endl;
 #endif
-  H::tablesPointCount += numPoints;
+  */
+  H::tablesPointCount += numPoints - numSingletons;
   // Return current token
   return H::t2; // return H::t2 which holds current token [E or T1]
 }
--- a/lshlib.h	Wed Jan 28 16:02:17 2009 +0000
+++ b/lshlib.h	Wed Jan 28 17:18:58 2009 +0000
@@ -95,7 +95,7 @@
   fclose(dbFile);error("write error in serial_write_format2",TOKENSTR);}	
 
 //#define LSH_DUMP_CORE_TABLES  // set to dump hashtables on load
-#define _LSH_DEBUG_             // turn on debugging information
+//#define _LSH_DEBUG_             // turn on debugging information
 
 //#define USE_U_FUNCTIONS       // set to use partial hashfunction re-use