comparison lshlib.cpp @ 324:c93be2f3a674

Merge of branches/large_adb -r 514:524 onto the trunk. No conflicts. Added LARGE_ADB support. Turn on with --ntracks 20001 or greater. Use --adb_feature_root to locate feature files at QUERY time. A bug fix in LSH indexing that was incorrectly thresholding large numbers of shingles.
author mas01mc
date Thu, 21 Aug 2008 21:28:33 +0000
parents 81ad865402e7
children fe4d5b763086
comparison
equal deleted inserted replaced
315:d2c56d4f841e 324:c93be2f3a674
769 std::cout.flush(); 769 std::cout.flush();
770 // memory map a single hash table for sequential access 770 // memory map a single hash table for sequential access
771 // Align each hash table to page boundary 771 // Align each hash table to page boundary
772 char* dbtable = serial_mmap(fid, hashTableSize, 1, 772 char* dbtable = serial_mmap(fid, hashTableSize, 1,
773 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn())); 773 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn()));
774 #ifdef __CYGWIN__
775 // No madvise in CYGWIN
776 #else
774 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0) 777 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0)
775 error("could not advise hashtable memory","","madvise"); 778 error("could not advise hashtable memory","","madvise");
776 779 #endif
777 maxColCount=0; 780 maxColCount=0;
778 minColCount=O2_SERIAL_MAX_COLS; 781 minColCount=O2_SERIAL_MAX_COLS;
779 meanColCount=0; 782 meanColCount=0;
780 colCountN=0; 783 colCountN=0;
781 pt=(SerialElementT*)dbtable; 784 pt=(SerialElementT*)dbtable;
1159 for( x = 0 ; x < H::L ; x++ ){ 1162 for( x = 0 ; x < H::L ; x++ ){
1160 // memory map a single hash table 1163 // memory map a single hash table
1161 // Align each hash table to page boundary 1164 // Align each hash table to page boundary
1162 char* dbtable = serial_mmap(fid, hashTableSize, 0, 1165 char* dbtable = serial_mmap(fid, hashTableSize, 0,
1163 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn())); 1166 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn()));
1167 #ifdef __CYGWIN__
1168 // No madvise in CYGWIN
1169 #else
1164 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0) 1170 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0)
1165 error("could not advise hashtable memory","","madvise"); 1171 error("could not advise hashtable memory","","madvise");
1172 #endif
1166 pt=(SerialElementT*)dbtable; 1173 pt=(SerialElementT*)dbtable;
1167 for( y = 0 ; y < H::N ; y++ ){ 1174 for( y = 0 ; y < H::N ; y++ ){
1168 // Move disk pointer to beginning of row 1175 // Move disk pointer to beginning of row
1169 pe=pt+y*lshHeader->numCols; 1176 pe=pt+y*lshHeader->numCols;
1170 unserialize_hashtable_row_format1(pe, h[x]+y); 1177 unserialize_hashtable_row_format1(pe, h[x]+y);
1329 1336
1330 for(Uns32T j=0; j<L; j++){ 1337 for(Uns32T j=0; j<L; j++){
1331 // memory map a single hash table for random access 1338 // memory map a single hash table for random access
1332 char* db = serial_mmap(dbfid, hashTableSize, 0, 1339 char* db = serial_mmap(dbfid, hashTableSize, 0,
1333 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn())); 1340 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
1341 #ifdef __CYGWIN__
1342 // No madvise in CYGWIN
1343 #else
1334 if(madvise(db, hashTableSize, MADV_RANDOM)<0) 1344 if(madvise(db, hashTableSize, MADV_RANDOM)<0)
1335 error("could not advise local hashtable memory","","madvise"); 1345 error("could not advise local hashtable memory","","madvise");
1346 #endif
1336 SerialElementT* pe = (SerialElementT*)db ; 1347 SerialElementT* pe = (SerialElementT*)db ;
1337 for(Uns32T qpos=0; qpos<vv.size(); qpos++){ 1348 for(Uns32T qpos=0; qpos<vv.size(); qpos++){
1338 H::compute_hash_functions(vv[qpos]); 1349 H::compute_hash_functions(vv[qpos]);
1339 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j)); 1350 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
1340 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row 1351 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
1362 H::compute_hash_functions(v); 1373 H::compute_hash_functions(v);
1363 for(Uns32T j=0; j<L; j++){ 1374 for(Uns32T j=0; j<L; j++){
1364 // memory map a single hash table for random access 1375 // memory map a single hash table for random access
1365 char* db = serial_mmap(dbfid, hashTableSize, 0, 1376 char* db = serial_mmap(dbfid, hashTableSize, 0,
1366 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn())); 1377 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
1378 #ifdef __CYGWIN__
1379 // No madvise in CYGWIN
1380 #else
1367 if(madvise(db, hashTableSize, MADV_RANDOM)<0) 1381 if(madvise(db, hashTableSize, MADV_RANDOM)<0)
1368 error("could not advise local hashtable memory","","madvise"); 1382 error("could not advise local hashtable memory","","madvise");
1383 #endif
1369 SerialElementT* pe = (SerialElementT*)db ; 1384 SerialElementT* pe = (SerialElementT*)db ;
1370 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j)); 1385 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
1371 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row 1386 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
1372 serial_munmap(db, hashTableSize); // drop hashtable mmap 1387 serial_munmap(db, hashTableSize); // drop hashtable mmap
1373 } 1388 }
1382 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols; 1397 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
1383 for(Uns32T j=0; j<L; j++){ 1398 for(Uns32T j=0; j<L; j++){
1384 // memory map a single hash table for random access 1399 // memory map a single hash table for random access
1385 char* db = serial_mmap(dbfid, hashTableSize, 0, 1400 char* db = serial_mmap(dbfid, hashTableSize, 0,
1386 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn())); 1401 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
1402 #ifdef __CYGWIN__
1403 // No madvise in CYGWIN
1404 #else
1387 if(madvise(db, hashTableSize, MADV_SEQUENTIAL)<0) 1405 if(madvise(db, hashTableSize, MADV_SEQUENTIAL)<0)
1388 error("could not advise local hashtable memory","","madvise"); 1406 error("could not advise local hashtable memory","","madvise");
1407 #endif
1389 SerialElementT* pe = (SerialElementT*)db ; 1408 SerialElementT* pe = (SerialElementT*)db ;
1390 printf("*********** TABLE %d ***************\n", j); 1409 printf("*********** TABLE %d ***************\n", j);
1391 fflush(stdout); 1410 fflush(stdout);
1392 int count=0; 1411 int count=0;
1393 do{ 1412 do{