Mercurial > hg > audiodb
changeset 525:11dd6eab15c8 multiprobeLSH
_LSH_DEBUG_ option now reports correct point counts per hashtable read.
author | mas01mc |
---|---|
date | Wed, 28 Jan 2009 17:18:58 +0000 |
parents | 469b50a3dd84 |
children | cbd5841e6b70 |
files | Makefile lshlib.cpp lshlib.h |
diffstat | 3 files changed, 34 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Wed Jan 28 16:02:17 2009 +0000 +++ b/Makefile Wed Jan 28 17:18:58 2009 +0000 @@ -17,14 +17,19 @@ MINORVERSION=0 LIBRARY=lib$(EXECUTABLE).so.$(SOVERSION).$(MINORVERSION) -override CFLAGS+=-ggdb -g -fPIC +override CFLAGS+=-O3 -g -fPIC # set to DUMP hashtables on QUERY load #override CFLAGS+=-DLSH_DUMP_CORE_TABLES +# set to turn on debugging information for LSH hashtables +#override CFLAGS+=-D_LSH_DEBUG_ + # set to increase multiple probes in LSH QUERY (allowable range = 1 ... lsh_k*2) #override CFLAGS+=-DLSH_MULTI_PROBE_COUNT=10 + + ifeq ($(shell uname),Linux) override CFLAGS+=-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 endif
--- a/lshlib.cpp Wed Jan 28 16:02:17 2009 +0000 +++ b/lshlib.cpp Wed Jan 28 17:18:58 2009 +0000 @@ -1416,10 +1416,12 @@ fclose(dbFile); error("Read error: numElements","unserialize_lsh_hashtables_format2()"); } - + + /* #ifdef _LSH_DEBUG_ cout << "[" << x << "," << y << "] numElements(disk) = " << numElements; #endif + */ // BACKWARD COMPATIBILITY: check to see if T2 or END token was read if(numElements==O2_SERIAL_TOKEN_T2 || numElements==O2_SERIAL_TOKEN_ENDTABLE ){ forMerge=true; // Force use of dynamic linked list core format @@ -1450,7 +1452,7 @@ H::t1 = token; } #ifdef _LSH_DEBUG_ - cout << " pointCount = " << tablesPointCount << endl; + cout << "[T " << x-1 << "] pointCount = " << tablesPointCount << endl; sumTablesPointCount+=tablesPointCount; #endif } @@ -1549,6 +1551,7 @@ secondPtr=ap++;\ *secondPtr=0;\ numPoints++;\ + numSingletons++;\ }\ if(numPointsThisBucket>1){\ *firstPtr |= ( (numPointsThisBucket-1) & 0x3 ) << SKIP_BITS_LEFT_SHIFT_MSB;\ @@ -1560,13 +1563,11 @@ Uns32T numPoints = 0; Uns32T* firstPtr = 0; Uns32T* secondPtr = 0; + Uns32T numSingletons = 0; // Count single point puckets because we encode them with 2 points (for skip) // Initialize new row if(!*rowPP){ *rowPP = new bucket(); -#ifdef _LSH_DEBUG_ - cout << " () "; -#endif #ifdef LSH_LIST_HEAD_COUNTERS (*rowPP)->t2 = 0; // Use t2 as a collision counter for the row (*rowPP)->next = 0; @@ -1589,12 +1590,22 @@ cout << "last_t2=" << last_t2 << ", t2=" << H::t2 << endl; TEST_TOKEN(H::t2<last_t2, "t2 tokens not in ascending order"); last_t2 = H::t2; + /* +#ifdef _LSH_DEBUG_ + cout << "+" << H::t2 << "+"; +#endif + */ *ap++ = H::t2; // Insert t2 value into array numBuckets++; READ_UNS32T(&(H::p), "Read error H::p"); while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){ if(numPointsThisBucket==MAX_POINTS_IN_BUCKET_CORE_ARRAY){ ENCODE_POINT_SKIP_BITS; + /* +#ifdef _LSH_DEBUG_ + cout << "*" << H::t2 << "*"; +#endif + */ *ap++ = H::t2; // Extra element numBuckets++; // record this as a new bucket numPointsThisBucket=0; // reset bucket point counter @@ -1605,12 +1616,18 @@ else if( numPointsThisBucket == 2 ) secondPtr = ap; // store pointer to first point to insert skip bits later on numPoints++; + /* +#ifdef _LSH_DEBUG_ + cout << "(" << H::p << ":" << numPoints << ")"; +#endif + */ + *ap++ = H::p; READ_UNS32T(&(H::p), "Read error H::p"); } ENCODE_POINT_SKIP_BITS; H::t2 = H::p; // Copy last found token to t2 - } + } // Reallocate the row to its actual size CR_ASSERT(rowPtr->next = (bucket*) realloc(rowPtr->next, (numBuckets+numPoints+1)*sizeof(Uns32T)+sizeof(bucket**))); // Record the sizes at the head of the row @@ -1623,10 +1640,12 @@ // Allocate a new dynamic list head at the end of the array bucket** listPtr = reinterpret_cast<bucket**> (ap); *listPtr = 0; + /* #ifdef _LSH_DEBUG_ - cout << " numBuckets=" << numBuckets << " numPoints=" << numPoints << " numElements(array) " << numBuckets+numPoints << " " << endl; + cout << " numBuckets=" << numBuckets << " numPoints=" << numPoints - numSingletons << " numElements(array) " << numBuckets+numPoints - numSingletons << " " << endl; #endif - H::tablesPointCount += numPoints; + */ + H::tablesPointCount += numPoints - numSingletons; // Return current token return H::t2; // return H::t2 which holds current token [E or T1] }
--- a/lshlib.h Wed Jan 28 16:02:17 2009 +0000 +++ b/lshlib.h Wed Jan 28 17:18:58 2009 +0000 @@ -95,7 +95,7 @@ fclose(dbFile);error("write error in serial_write_format2",TOKENSTR);} //#define LSH_DUMP_CORE_TABLES // set to dump hashtables on load -#define _LSH_DEBUG_ // turn on debugging information +//#define _LSH_DEBUG_ // turn on debugging information //#define USE_U_FUNCTIONS // set to use partial hashfunction re-use