Mercurial > hg > audiodb
diff lshlib.cpp @ 523:83e37b76b483 multiprobeLSH
insert some statistics gathering for hash tables, add _LSH_DEBUG_ macro to output LSH statistics on INDEX and QUERY
author | mas01mc |
---|---|
date | Wed, 28 Jan 2009 05:18:14 +0000 |
parents | dad3d252462a |
children | 469b50a3dd84 |
line wrap: on
line diff
--- a/lshlib.cpp Tue Jan 27 14:52:28 2009 +0000 +++ b/lshlib.cpp Wed Jan 28 05:18:14 2009 +0000 @@ -1010,6 +1010,7 @@ minColCount=O2_SERIAL_MAX_COLS; meanColCount=0; colCountN=0; + H::tablesPointCount = 0; for( y = 0 ; y < H::N ; y++ ){ colCount=0; if(bucket* bPtr = h[x][y]){ @@ -1047,12 +1048,13 @@ meanColCount+=colCount; colCountN++; } + H::tablesPointCount+=colCount; } // Write END of table marker t1 = O2_SERIAL_TOKEN_ENDTABLE; WRITE_UNS32(&t1,"[end]"); if(colCountN) - std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN + std::cout << "#points: " << H::tablesPointCount << " #rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN << ", min = " << minColCount << ", max = " << maxColCount << endl; } @@ -1343,7 +1345,12 @@ void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){ Uns32T x=0,y=0; - +#ifdef _LSH_DEBUG_ + cout << "Loading hashtables..." << endl; + cout << "header pointCount = " << pointCount << endl; + cout << "forMerge = " << forMerge << endl; + Uns32T sumTablesPointCount = 0; +#endif // Seek to hashtable base offset if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){ fclose(dbFile); @@ -1352,6 +1359,7 @@ // Read the hash tables into core (structure is given in header) while( x < H::L){ + tablesPointCount=0; if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){ fclose(dbFile); error("Read error","unserialize_lsh_hashtables_format2()"); @@ -1395,7 +1403,7 @@ // Use ARRAY CORE format with numElements counter token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements); #else - token = unserialize_hashtable_row_format2(dbFile, h[x]+y); + token = unserialize_hashtable_row_format2(dbFile, h[x]+y); #endif // Check that token is valid if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){ @@ -1411,7 +1419,14 @@ if(token==O2_SERIAL_TOKEN_T1) H::t1 = token; } +#ifdef _LSH_DEBUG_ + cout << "table " << x << " pointCount = " << tablesPointCount << endl; + sumTablesPointCount+=tablesPointCount; +#endif } +#ifdef _LSH_DEBUG_ + cout << "TOTAL pointCount = " << sumTablesPointCount << endl; +#endif #ifdef LSH_DUMP_CORE_TABLES dump_hashtables(); #endif @@ -1451,6 +1466,7 @@ while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){ pointFound=true; bucket_insert_point(b); + tablesPointCount++; if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){ fclose(dbFile); error("Read error H::p","unserialize_hashtable_row_format2"); @@ -1570,6 +1586,7 @@ // Allocate a new dynamic list head at the end of the array bucket** listPtr = reinterpret_cast<bucket**> (ap); *listPtr = 0; + H::tablesPointCount += numPoints; // Return current token return H::t2; // return H::t2 which holds current token [E or T1] }