Mercurial > hg > audiodb
comparison lshlib.cpp @ 523:83e37b76b483 multiprobeLSH
insert some statistics gathering for hash tables, add _LSH_DEBUG_ macro to output LSH statistics on INDEX and QUERY
author | mas01mc |
---|---|
date | Wed, 28 Jan 2009 05:18:14 +0000 |
parents | dad3d252462a |
children | 469b50a3dd84 |
comparison
equal
deleted
inserted
replaced
522:dad3d252462a | 523:83e37b76b483 |
---|---|
1008 std::cout.flush(); | 1008 std::cout.flush(); |
1009 maxColCount=0; | 1009 maxColCount=0; |
1010 minColCount=O2_SERIAL_MAX_COLS; | 1010 minColCount=O2_SERIAL_MAX_COLS; |
1011 meanColCount=0; | 1011 meanColCount=0; |
1012 colCountN=0; | 1012 colCountN=0; |
1013 H::tablesPointCount = 0; | |
1013 for( y = 0 ; y < H::N ; y++ ){ | 1014 for( y = 0 ; y < H::N ; y++ ){ |
1014 colCount=0; | 1015 colCount=0; |
1015 if(bucket* bPtr = h[x][y]){ | 1016 if(bucket* bPtr = h[x][y]){ |
1016 // Check for empty row (even though row was allocated) | 1017 // Check for empty row (even though row was allocated) |
1017 #ifdef LSH_LIST_HEAD_COUNTERS | 1018 #ifdef LSH_LIST_HEAD_COUNTERS |
1045 if(colCount>maxColCount) | 1046 if(colCount>maxColCount) |
1046 maxColCount=colCount; | 1047 maxColCount=colCount; |
1047 meanColCount+=colCount; | 1048 meanColCount+=colCount; |
1048 colCountN++; | 1049 colCountN++; |
1049 } | 1050 } |
1051 H::tablesPointCount+=colCount; | |
1050 } | 1052 } |
1051 // Write END of table marker | 1053 // Write END of table marker |
1052 t1 = O2_SERIAL_TOKEN_ENDTABLE; | 1054 t1 = O2_SERIAL_TOKEN_ENDTABLE; |
1053 WRITE_UNS32(&t1,"[end]"); | 1055 WRITE_UNS32(&t1,"[end]"); |
1054 if(colCountN) | 1056 if(colCountN) |
1055 std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN | 1057 std::cout << "#points: " << H::tablesPointCount << " #rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN |
1056 << ", min = " << minColCount << ", max = " << maxColCount | 1058 << ", min = " << minColCount << ", max = " << maxColCount |
1057 << endl; | 1059 << endl; |
1058 } | 1060 } |
1059 // We're done writing | 1061 // We're done writing |
1060 return 1; | 1062 return 1; |
1341 } | 1343 } |
1342 } | 1344 } |
1343 | 1345 |
1344 void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){ | 1346 void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){ |
1345 Uns32T x=0,y=0; | 1347 Uns32T x=0,y=0; |
1346 | 1348 #ifdef _LSH_DEBUG_ |
1349 cout << "Loading hashtables..." << endl; | |
1350 cout << "header pointCount = " << pointCount << endl; | |
1351 cout << "forMerge = " << forMerge << endl; | |
1352 Uns32T sumTablesPointCount = 0; | |
1353 #endif | |
1347 // Seek to hashtable base offset | 1354 // Seek to hashtable base offset |
1348 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){ | 1355 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){ |
1349 fclose(dbFile); | 1356 fclose(dbFile); |
1350 error("fSeek error in unserialize_lsh_hashtables_format2"); | 1357 error("fSeek error in unserialize_lsh_hashtables_format2"); |
1351 } | 1358 } |
1352 | 1359 |
1353 // Read the hash tables into core (structure is given in header) | 1360 // Read the hash tables into core (structure is given in header) |
1354 while( x < H::L){ | 1361 while( x < H::L){ |
1362 tablesPointCount=0; | |
1355 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){ | 1363 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){ |
1356 fclose(dbFile); | 1364 fclose(dbFile); |
1357 error("Read error","unserialize_lsh_hashtables_format2()"); | 1365 error("Read error","unserialize_lsh_hashtables_format2()"); |
1358 } | 1366 } |
1359 if(H::t1==O2_SERIAL_TOKEN_ENDTABLE) | 1367 if(H::t1==O2_SERIAL_TOKEN_ENDTABLE) |
1393 token = unserialize_hashtable_row_format2(dbFile, h[x]+y, token); | 1401 token = unserialize_hashtable_row_format2(dbFile, h[x]+y, token); |
1394 else | 1402 else |
1395 // Use ARRAY CORE format with numElements counter | 1403 // Use ARRAY CORE format with numElements counter |
1396 token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements); | 1404 token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements); |
1397 #else | 1405 #else |
1398 token = unserialize_hashtable_row_format2(dbFile, h[x]+y); | 1406 token = unserialize_hashtable_row_format2(dbFile, h[x]+y); |
1399 #endif | 1407 #endif |
1400 // Check that token is valid | 1408 // Check that token is valid |
1401 if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){ | 1409 if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){ |
1402 fclose(dbFile); | 1410 fclose(dbFile); |
1403 error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()"); | 1411 error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()"); |
1409 } | 1417 } |
1410 // Check for new row flag | 1418 // Check for new row flag |
1411 if(token==O2_SERIAL_TOKEN_T1) | 1419 if(token==O2_SERIAL_TOKEN_T1) |
1412 H::t1 = token; | 1420 H::t1 = token; |
1413 } | 1421 } |
1414 } | 1422 #ifdef _LSH_DEBUG_ |
1423 cout << "table " << x << " pointCount = " << tablesPointCount << endl; | |
1424 sumTablesPointCount+=tablesPointCount; | |
1425 #endif | |
1426 } | |
1427 #ifdef _LSH_DEBUG_ | |
1428 cout << "TOTAL pointCount = " << sumTablesPointCount << endl; | |
1429 #endif | |
1415 #ifdef LSH_DUMP_CORE_TABLES | 1430 #ifdef LSH_DUMP_CORE_TABLES |
1416 dump_hashtables(); | 1431 dump_hashtables(); |
1417 #endif | 1432 #endif |
1418 } | 1433 } |
1419 | 1434 |
1449 error("Read error H::p","unserialize_hashtable_row_format2"); | 1464 error("Read error H::p","unserialize_hashtable_row_format2"); |
1450 } | 1465 } |
1451 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){ | 1466 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){ |
1452 pointFound=true; | 1467 pointFound=true; |
1453 bucket_insert_point(b); | 1468 bucket_insert_point(b); |
1469 tablesPointCount++; | |
1454 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){ | 1470 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){ |
1455 fclose(dbFile); | 1471 fclose(dbFile); |
1456 error("Read error H::p","unserialize_hashtable_row_format2"); | 1472 error("Read error H::p","unserialize_hashtable_row_format2"); |
1457 } | 1473 } |
1458 } | 1474 } |
1568 // Set the LSH_CORE_ARRAY_BIT to identify data structure for insertion and retrieval | 1584 // Set the LSH_CORE_ARRAY_BIT to identify data structure for insertion and retrieval |
1569 rowPtr->t2 |= LSH_CORE_ARRAY_BIT; | 1585 rowPtr->t2 |= LSH_CORE_ARRAY_BIT; |
1570 // Allocate a new dynamic list head at the end of the array | 1586 // Allocate a new dynamic list head at the end of the array |
1571 bucket** listPtr = reinterpret_cast<bucket**> (ap); | 1587 bucket** listPtr = reinterpret_cast<bucket**> (ap); |
1572 *listPtr = 0; | 1588 *listPtr = 0; |
1589 H::tablesPointCount += numPoints; | |
1573 // Return current token | 1590 // Return current token |
1574 return H::t2; // return H::t2 which holds current token [E or T1] | 1591 return H::t2; // return H::t2 which holds current token [E or T1] |
1575 } | 1592 } |
1576 | 1593 |
1577 | 1594 |