comparison lshlib.cpp @ 523:83e37b76b483 multiprobeLSH

insert some statistics gathering for hash tables, add _LSH_DEBUG_ macro to output LSH statistics on INDEX and QUERY
author mas01mc
date Wed, 28 Jan 2009 05:18:14 +0000
parents dad3d252462a
children 469b50a3dd84
comparison
equal deleted inserted replaced
522:dad3d252462a 523:83e37b76b483
1008 std::cout.flush(); 1008 std::cout.flush();
1009 maxColCount=0; 1009 maxColCount=0;
1010 minColCount=O2_SERIAL_MAX_COLS; 1010 minColCount=O2_SERIAL_MAX_COLS;
1011 meanColCount=0; 1011 meanColCount=0;
1012 colCountN=0; 1012 colCountN=0;
1013 H::tablesPointCount = 0;
1013 for( y = 0 ; y < H::N ; y++ ){ 1014 for( y = 0 ; y < H::N ; y++ ){
1014 colCount=0; 1015 colCount=0;
1015 if(bucket* bPtr = h[x][y]){ 1016 if(bucket* bPtr = h[x][y]){
1016 // Check for empty row (even though row was allocated) 1017 // Check for empty row (even though row was allocated)
1017 #ifdef LSH_LIST_HEAD_COUNTERS 1018 #ifdef LSH_LIST_HEAD_COUNTERS
1045 if(colCount>maxColCount) 1046 if(colCount>maxColCount)
1046 maxColCount=colCount; 1047 maxColCount=colCount;
1047 meanColCount+=colCount; 1048 meanColCount+=colCount;
1048 colCountN++; 1049 colCountN++;
1049 } 1050 }
1051 H::tablesPointCount+=colCount;
1050 } 1052 }
1051 // Write END of table marker 1053 // Write END of table marker
1052 t1 = O2_SERIAL_TOKEN_ENDTABLE; 1054 t1 = O2_SERIAL_TOKEN_ENDTABLE;
1053 WRITE_UNS32(&t1,"[end]"); 1055 WRITE_UNS32(&t1,"[end]");
1054 if(colCountN) 1056 if(colCountN)
1055 std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN 1057 std::cout << "#points: " << H::tablesPointCount << " #rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
1056 << ", min = " << minColCount << ", max = " << maxColCount 1058 << ", min = " << minColCount << ", max = " << maxColCount
1057 << endl; 1059 << endl;
1058 } 1060 }
1059 // We're done writing 1061 // We're done writing
1060 return 1; 1062 return 1;
1341 } 1343 }
1342 } 1344 }
1343 1345
1344 void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){ 1346 void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){
1345 Uns32T x=0,y=0; 1347 Uns32T x=0,y=0;
1346 1348 #ifdef _LSH_DEBUG_
1349 cout << "Loading hashtables..." << endl;
1350 cout << "header pointCount = " << pointCount << endl;
1351 cout << "forMerge = " << forMerge << endl;
1352 Uns32T sumTablesPointCount = 0;
1353 #endif
1347 // Seek to hashtable base offset 1354 // Seek to hashtable base offset
1348 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){ 1355 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
1349 fclose(dbFile); 1356 fclose(dbFile);
1350 error("fSeek error in unserialize_lsh_hashtables_format2"); 1357 error("fSeek error in unserialize_lsh_hashtables_format2");
1351 } 1358 }
1352 1359
1353 // Read the hash tables into core (structure is given in header) 1360 // Read the hash tables into core (structure is given in header)
1354 while( x < H::L){ 1361 while( x < H::L){
1362 tablesPointCount=0;
1355 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){ 1363 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){
1356 fclose(dbFile); 1364 fclose(dbFile);
1357 error("Read error","unserialize_lsh_hashtables_format2()"); 1365 error("Read error","unserialize_lsh_hashtables_format2()");
1358 } 1366 }
1359 if(H::t1==O2_SERIAL_TOKEN_ENDTABLE) 1367 if(H::t1==O2_SERIAL_TOKEN_ENDTABLE)
1393 token = unserialize_hashtable_row_format2(dbFile, h[x]+y, token); 1401 token = unserialize_hashtable_row_format2(dbFile, h[x]+y, token);
1394 else 1402 else
1395 // Use ARRAY CORE format with numElements counter 1403 // Use ARRAY CORE format with numElements counter
1396 token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements); 1404 token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements);
1397 #else 1405 #else
1398 token = unserialize_hashtable_row_format2(dbFile, h[x]+y); 1406 token = unserialize_hashtable_row_format2(dbFile, h[x]+y);
1399 #endif 1407 #endif
1400 // Check that token is valid 1408 // Check that token is valid
1401 if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){ 1409 if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){
1402 fclose(dbFile); 1410 fclose(dbFile);
1403 error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()"); 1411 error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()");
1409 } 1417 }
1410 // Check for new row flag 1418 // Check for new row flag
1411 if(token==O2_SERIAL_TOKEN_T1) 1419 if(token==O2_SERIAL_TOKEN_T1)
1412 H::t1 = token; 1420 H::t1 = token;
1413 } 1421 }
1414 } 1422 #ifdef _LSH_DEBUG_
1423 cout << "table " << x << " pointCount = " << tablesPointCount << endl;
1424 sumTablesPointCount+=tablesPointCount;
1425 #endif
1426 }
1427 #ifdef _LSH_DEBUG_
1428 cout << "TOTAL pointCount = " << sumTablesPointCount << endl;
1429 #endif
1415 #ifdef LSH_DUMP_CORE_TABLES 1430 #ifdef LSH_DUMP_CORE_TABLES
1416 dump_hashtables(); 1431 dump_hashtables();
1417 #endif 1432 #endif
1418 } 1433 }
1419 1434
1449 error("Read error H::p","unserialize_hashtable_row_format2"); 1464 error("Read error H::p","unserialize_hashtable_row_format2");
1450 } 1465 }
1451 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){ 1466 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
1452 pointFound=true; 1467 pointFound=true;
1453 bucket_insert_point(b); 1468 bucket_insert_point(b);
1469 tablesPointCount++;
1454 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){ 1470 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){
1455 fclose(dbFile); 1471 fclose(dbFile);
1456 error("Read error H::p","unserialize_hashtable_row_format2"); 1472 error("Read error H::p","unserialize_hashtable_row_format2");
1457 } 1473 }
1458 } 1474 }
1568 // Set the LSH_CORE_ARRAY_BIT to identify data structure for insertion and retrieval 1584 // Set the LSH_CORE_ARRAY_BIT to identify data structure for insertion and retrieval
1569 rowPtr->t2 |= LSH_CORE_ARRAY_BIT; 1585 rowPtr->t2 |= LSH_CORE_ARRAY_BIT;
1570 // Allocate a new dynamic list head at the end of the array 1586 // Allocate a new dynamic list head at the end of the array
1571 bucket** listPtr = reinterpret_cast<bucket**> (ap); 1587 bucket** listPtr = reinterpret_cast<bucket**> (ap);
1572 *listPtr = 0; 1588 *listPtr = 0;
1589 H::tablesPointCount += numPoints;
1573 // Return current token 1590 // Return current token
1574 return H::t2; // return H::t2 which holds current token [E or T1] 1591 return H::t2; // return H::t2 which holds current token [E or T1]
1575 } 1592 }
1576 1593
1577 1594