comparison audioDB.cpp @ 187:530154ca4cf5 no-big-mmap

Wow, two changes for the price of one * remove dataBuf usage from trackPointQuery() * make the restrict-list tests pass: ** lseek() to the right place if we're actually not doing tracks in sequential-order; ** deal with the off-by-one error in reading in lines from trackFile.
author mas01cr
date Fri, 16 Nov 2007 16:31:36 +0000
parents ae212368a874
children 0caa733d48c5
comparison
equal deleted inserted replaced
186:d5ae11d6cd2c 187:530154ca4cf5
597 unsigned insertoffset = dbH->length;// Store current state 597 unsigned insertoffset = dbH->length;// Store current state
598 598
599 // Check times status and insert times from file 599 // Check times status and insert times from file
600 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); 600 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
601 double* timesdata=timesTable+timesoffset; 601 double* timesdata=timesTable+timesoffset;
602 assert(timesdata+numVectors<l2normTable); 602 /* FIXME: work out how to check against wandering off the end of the
603 times table.
604
605 assert(timesdata+numVectors<l2normTable);
606 */
603 insertTimeStamps(numVectors, timesFile, timesdata); 607 insertTimeStamps(numVectors, timesFile, timesdata);
604 608
605 // Increment file count 609 // Increment file count
606 dbH->numFiles++; 610 dbH->numFiles++;
607 611
1344 query=query+queryPoint*dbH->dim; 1348 query=query+queryPoint*dbH->dim;
1345 numVectors=queryPoint+1; 1349 numVectors=queryPoint+1;
1346 } 1350 }
1347 1351
1348 // build track offset table 1352 // build track offset table
1349 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; 1353 off_t *trackOffsetTable = new off_t[dbH->numFiles];
1350 unsigned cumTrack=0; 1354 unsigned cumTrack=0;
1351 unsigned trackIndexOffset; 1355 off_t trackIndexOffset;
1352 for(k=0; k<dbH->numFiles;k++){ 1356 for(k=0; k<dbH->numFiles;k++){
1353 trackOffsetTable[k]=cumTrack; 1357 trackOffsetTable[k]=cumTrack;
1354 cumTrack+=trackTable[k]*dbH->dim; 1358 cumTrack+=trackTable[k]*dbH->dim;
1355 } 1359 }
1356 1360
1357 char nextKey[MAXSTR]; 1361 char nextKey[MAXSTR];
1358 1362
1359 gettimeofday(&tv1, NULL); 1363 gettimeofday(&tv1, NULL);
1364
1365 size_t data_buffer_size = 0;
1366 double *data_buffer = 0;
1367 lseek(dbfid, dbH->dataOffset, SEEK_SET);
1360 1368
1361 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ 1369 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
1362 if(trackFile){ 1370
1363 if(!trackFile->eof()){ 1371 trackOffset = trackOffsetTable[track]; // numDoubles offset
1364 trackFile->getline(nextKey,MAXSTR); 1372
1365 track=getKeyPos(nextKey); 1373 // get trackID from file if using a control file
1366 } 1374 if(trackFile) {
1367 else 1375 trackFile->getline(nextKey,MAXSTR);
1376 if(!trackFile->eof()) {
1377 track = getKeyPos(nextKey);
1378 trackOffset = trackOffsetTable[track];
1379 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
1380 } else {
1368 break; 1381 break;
1369 } 1382 }
1370 trackOffset=trackOffsetTable[track]; // numDoubles offset 1383 }
1384
1371 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset 1385 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
1386
1372 if(verbosity>7) { 1387 if(verbosity>7) {
1373 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); 1388 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush();
1374 } 1389 }
1375 1390
1376 if(dbH->flags & O2_FLAG_L2NORM) 1391 if(dbH->flags & O2_FLAG_L2NORM)
1379 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); 1394 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int));
1380 if(usingQueryPoint) 1395 if(usingQueryPoint)
1381 j=1; 1396 j=1;
1382 else 1397 else
1383 j=numVectors; 1398 j=numVectors;
1399
1400 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
1401 if(data_buffer) {
1402 free(data_buffer);
1403 }
1404 {
1405 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
1406 void *tmp = malloc(data_buffer_size);
1407 if (tmp == NULL) {
1408 error("error allocating data buffer");
1409 }
1410 data_buffer = (double *) tmp;
1411 }
1412 }
1413
1414 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
1415
1384 while(j--){ 1416 while(j--){
1385 k=trackTable[track]; // number of vectors in track 1417 k=trackTable[track]; // number of vectors in track
1386 data=dataBuf+trackOffset; // data for track 1418 data=data_buffer; // data for track
1387 while(k--){ 1419 while(k--){
1388 thisDist=0; 1420 thisDist=0;
1389 l=dbH->dim; 1421 l=dbH->dim;
1390 double* q=query; 1422 double* q=query;
1391 while(l--) 1423 while(l--)
1451 distances[k]=-DBL_MAX; 1483 distances[k]=-DBL_MAX;
1452 qIndexes[k]=~0; 1484 qIndexes[k]=~0;
1453 sIndexes[k]=~0; 1485 sIndexes[k]=~0;
1454 } 1486 }
1455 } // tracks 1487 } // tracks
1488
1489 free(data_buffer);
1490
1456 gettimeofday(&tv2, NULL); 1491 gettimeofday(&tv2, NULL);
1457 1492
1458 if(verbosity>1) { 1493 if(verbosity>1) {
1459 cerr << endl << "processed tracks :" << processedTracks 1494 cerr << endl << "processed tracks :" << processedTracks
1460 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; 1495 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
1736 double* qp; 1771 double* qp;
1737 double* sp; 1772 double* sp;
1738 double* dp; 1773 double* dp;
1739 1774
1740 // build track offset table 1775 // build track offset table
1741 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; 1776 off_t *trackOffsetTable = new off_t[dbH->numFiles];
1742 unsigned cumTrack=0; 1777 unsigned cumTrack=0;
1743 unsigned trackIndexOffset; 1778 off_t trackIndexOffset;
1744 for(k=0; k<dbH->numFiles;k++){ 1779 for(k=0; k<dbH->numFiles;k++){
1745 trackOffsetTable[k]=cumTrack; 1780 trackOffsetTable[k]=cumTrack;
1746 cumTrack+=trackTable[k]*dbH->dim; 1781 cumTrack+=trackTable[k]*dbH->dim;
1747 } 1782 }
1748 1783
1760 double *data_buffer = 0; 1795 double *data_buffer = 0;
1761 lseek(dbfid, dbH->dataOffset, SEEK_SET); 1796 lseek(dbfid, dbH->dataOffset, SEEK_SET);
1762 1797
1763 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) { 1798 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
1764 1799
1800 trackOffset = trackOffsetTable[track]; // numDoubles offset
1801
1765 // get trackID from file if using a control file 1802 // get trackID from file if using a control file
1766 if(trackFile){ 1803 if(trackFile) {
1767 if(!trackFile->eof()){ 1804 trackFile->getline(nextKey,MAXSTR);
1768 trackFile->getline(nextKey,MAXSTR); 1805 if(!trackFile->eof()) {
1769 track=getKeyPos(nextKey); 1806 track = getKeyPos(nextKey);
1770 } 1807 trackOffset = trackOffsetTable[track];
1771 else 1808 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
1809 } else {
1772 break; 1810 break;
1773 } 1811 }
1774 1812 }
1775 trackOffset=trackOffsetTable[track]; // numDoubles offset 1813
1776 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset 1814 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
1777 1815
1778 if(sequenceLength<=trackTable[track]){ // test for short sequences 1816 if(sequenceLength<=trackTable[track]){ // test for short sequences
1779 1817
1780 if(verbosity>7) { 1818 if(verbosity>7) {
2262 double* qp; 2300 double* qp;
2263 double* sp; 2301 double* sp;
2264 double* dp; 2302 double* dp;
2265 2303
2266 // build track offset table 2304 // build track offset table
2267 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; 2305 off_t *trackOffsetTable = new off_t[dbH->numFiles];
2268 unsigned cumTrack=0; 2306 unsigned cumTrack=0;
2269 unsigned trackIndexOffset; 2307 off_t trackIndexOffset;
2270 for(k=0; k<dbH->numFiles;k++){ 2308 for(k=0; k<dbH->numFiles;k++){
2271 trackOffsetTable[k]=cumTrack; 2309 trackOffsetTable[k]=cumTrack;
2272 cumTrack+=trackTable[k]*dbH->dim; 2310 cumTrack+=trackTable[k]*dbH->dim;
2273 } 2311 }
2274 2312
2286 double *data_buffer = 0; 2324 double *data_buffer = 0;
2287 lseek(dbfid, dbH->dataOffset, SEEK_SET); 2325 lseek(dbfid, dbH->dataOffset, SEEK_SET);
2288 2326
2289 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ 2327 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
2290 2328
2329 trackOffset = trackOffsetTable[track]; // numDoubles offset
2330
2291 // get trackID from file if using a control file 2331 // get trackID from file if using a control file
2292 if(trackFile){ 2332 if(trackFile) {
2293 if(!trackFile->eof()){ 2333 trackFile->getline(nextKey,MAXSTR);
2294 trackFile->getline(nextKey,MAXSTR); 2334 if(!trackFile->eof()) {
2295 track=getKeyPos(nextKey); 2335 track = getKeyPos(nextKey);
2296 } 2336 trackOffset = trackOffsetTable[track];
2297 else 2337 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
2338 } else {
2298 break; 2339 break;
2299 } 2340 }
2300 2341 }
2301 trackOffset=trackOffsetTable[track]; // numDoubles offset 2342
2302 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset 2343 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
2303 2344
2304 if(sequenceLength<=trackTable[track]){ // test for short sequences 2345 if(sequenceLength<=trackTable[track]){ // test for short sequences
2305 2346
2306 if(verbosity>7) { 2347 if(verbosity>7) {