Mercurial > hg > audiodb
changeset 523:83e37b76b483 multiprobeLSH
insert some statistics gathering for hash tables, add _LSH_DEBUG_ macro to output LSH statistics on INDEX and QUERY
author | mas01mc |
---|---|
date | Wed, 28 Jan 2009 05:18:14 +0000 |
parents | dad3d252462a |
children | 469b50a3dd84 |
files | lshlib.cpp lshlib.h |
diffstat | 2 files changed, 26 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/lshlib.cpp Tue Jan 27 14:52:28 2009 +0000 +++ b/lshlib.cpp Wed Jan 28 05:18:14 2009 +0000 @@ -1010,6 +1010,7 @@ minColCount=O2_SERIAL_MAX_COLS; meanColCount=0; colCountN=0; + H::tablesPointCount = 0; for( y = 0 ; y < H::N ; y++ ){ colCount=0; if(bucket* bPtr = h[x][y]){ @@ -1047,12 +1048,13 @@ meanColCount+=colCount; colCountN++; } + H::tablesPointCount+=colCount; } // Write END of table marker t1 = O2_SERIAL_TOKEN_ENDTABLE; WRITE_UNS32(&t1,"[end]"); if(colCountN) - std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN + std::cout << "#points: " << H::tablesPointCount << " #rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN << ", min = " << minColCount << ", max = " << maxColCount << endl; } @@ -1343,7 +1345,12 @@ void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){ Uns32T x=0,y=0; - +#ifdef _LSH_DEBUG_ + cout << "Loading hashtables..." << endl; + cout << "header pointCount = " << pointCount << endl; + cout << "forMerge = " << forMerge << endl; + Uns32T sumTablesPointCount = 0; +#endif // Seek to hashtable base offset if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){ fclose(dbFile); @@ -1352,6 +1359,7 @@ // Read the hash tables into core (structure is given in header) while( x < H::L){ + tablesPointCount=0; if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){ fclose(dbFile); error("Read error","unserialize_lsh_hashtables_format2()"); @@ -1395,7 +1403,7 @@ // Use ARRAY CORE format with numElements counter token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements); #else - token = unserialize_hashtable_row_format2(dbFile, h[x]+y); + token = unserialize_hashtable_row_format2(dbFile, h[x]+y); #endif // Check that token is valid if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){ @@ -1411,7 +1419,14 @@ if(token==O2_SERIAL_TOKEN_T1) H::t1 = token; } +#ifdef _LSH_DEBUG_ + cout << "table " << x << " pointCount = " << tablesPointCount << endl; + sumTablesPointCount+=tablesPointCount; +#endif } +#ifdef _LSH_DEBUG_ + cout << "TOTAL pointCount = " << sumTablesPointCount << endl; +#endif #ifdef LSH_DUMP_CORE_TABLES dump_hashtables(); #endif @@ -1451,6 +1466,7 @@ while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){ pointFound=true; bucket_insert_point(b); + tablesPointCount++; if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){ fclose(dbFile); error("Read error H::p","unserialize_hashtable_row_format2"); @@ -1570,6 +1586,7 @@ // Allocate a new dynamic list head at the end of the array bucket** listPtr = reinterpret_cast<bucket**> (ap); *listPtr = 0; + H::tablesPointCount += numPoints; // Return current token return H::t2; // return H::t2 which holds current token [E or T1] }
--- a/lshlib.h Tue Jan 27 14:52:28 2009 +0000 +++ b/lshlib.h Wed Jan 28 05:18:14 2009 +0000 @@ -94,9 +94,13 @@ #define WRITE_UNS32(VAL, TOKENSTR) if( fwrite(VAL, sizeof(Uns32T), 1, dbFile) != 1 ){\ fclose(dbFile);error("write error in serial_write_format2",TOKENSTR);} -//#define LSH_DUMP_CORE_TABLES // set to dump hashtables on load +#define LSH_DUMP_CORE_TABLES // set to dump hashtables on load +#define _LSH_DEBUG_ // turn on debugging information + //#define USE_U_FUNCTIONS // set to use partial hashfunction re-use + + // Backward-compatible CORE ARRAY lsh index #define LSH_CORE_ARRAY // Set to use arrays for hashtables rather than linked-lists #define LSH_LIST_HEAD_COUNTERS // Enable counters in hashtable list heads @@ -232,6 +236,7 @@ Uns32T bucketCount; // count of number of point buckets allocated Uns32T pointCount; // count of number of points inserted Uns32T collisionCount; // number of points collided in a hash-table row + Uns32T tablesPointCount; // count of points per hash table on load Uns32T t1; // first hash table key Uns32T t2; // second hash table key