Mercurial > hg > audiodb
comparison audioDB.cpp @ 187:530154ca4cf5 no-big-mmap
Wow, two changes for the price of one
* remove dataBuf usage from trackPointQuery()
* make the restrict-list tests pass:
** lseek() to the right place if we're actually not doing tracks in
sequential-order;
** deal with the off-by-one error in reading in lines from trackFile.
author | mas01cr |
---|---|
date | Fri, 16 Nov 2007 16:31:36 +0000 |
parents | ae212368a874 |
children | 0caa733d48c5 |
comparison
equal
deleted
inserted
replaced
186:d5ae11d6cd2c | 187:530154ca4cf5 |
---|---|
597 unsigned insertoffset = dbH->length;// Store current state | 597 unsigned insertoffset = dbH->length;// Store current state |
598 | 598 |
599 // Check times status and insert times from file | 599 // Check times status and insert times from file |
600 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); | 600 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); |
601 double* timesdata=timesTable+timesoffset; | 601 double* timesdata=timesTable+timesoffset; |
602 assert(timesdata+numVectors<l2normTable); | 602 /* FIXME: work out how to check against wandering off the end of the |
603 times table. | |
604 | |
605 assert(timesdata+numVectors<l2normTable); | |
606 */ | |
603 insertTimeStamps(numVectors, timesFile, timesdata); | 607 insertTimeStamps(numVectors, timesFile, timesdata); |
604 | 608 |
605 // Increment file count | 609 // Increment file count |
606 dbH->numFiles++; | 610 dbH->numFiles++; |
607 | 611 |
1344 query=query+queryPoint*dbH->dim; | 1348 query=query+queryPoint*dbH->dim; |
1345 numVectors=queryPoint+1; | 1349 numVectors=queryPoint+1; |
1346 } | 1350 } |
1347 | 1351 |
1348 // build track offset table | 1352 // build track offset table |
1349 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; | 1353 off_t *trackOffsetTable = new off_t[dbH->numFiles]; |
1350 unsigned cumTrack=0; | 1354 unsigned cumTrack=0; |
1351 unsigned trackIndexOffset; | 1355 off_t trackIndexOffset; |
1352 for(k=0; k<dbH->numFiles;k++){ | 1356 for(k=0; k<dbH->numFiles;k++){ |
1353 trackOffsetTable[k]=cumTrack; | 1357 trackOffsetTable[k]=cumTrack; |
1354 cumTrack+=trackTable[k]*dbH->dim; | 1358 cumTrack+=trackTable[k]*dbH->dim; |
1355 } | 1359 } |
1356 | 1360 |
1357 char nextKey[MAXSTR]; | 1361 char nextKey[MAXSTR]; |
1358 | 1362 |
1359 gettimeofday(&tv1, NULL); | 1363 gettimeofday(&tv1, NULL); |
1364 | |
1365 size_t data_buffer_size = 0; | |
1366 double *data_buffer = 0; | |
1367 lseek(dbfid, dbH->dataOffset, SEEK_SET); | |
1360 | 1368 |
1361 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ | 1369 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ |
1362 if(trackFile){ | 1370 |
1363 if(!trackFile->eof()){ | 1371 trackOffset = trackOffsetTable[track]; // numDoubles offset |
1364 trackFile->getline(nextKey,MAXSTR); | 1372 |
1365 track=getKeyPos(nextKey); | 1373 // get trackID from file if using a control file |
1366 } | 1374 if(trackFile) { |
1367 else | 1375 trackFile->getline(nextKey,MAXSTR); |
1376 if(!trackFile->eof()) { | |
1377 track = getKeyPos(nextKey); | |
1378 trackOffset = trackOffsetTable[track]; | |
1379 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); | |
1380 } else { | |
1368 break; | 1381 break; |
1369 } | 1382 } |
1370 trackOffset=trackOffsetTable[track]; // numDoubles offset | 1383 } |
1384 | |
1371 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | 1385 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset |
1386 | |
1372 if(verbosity>7) { | 1387 if(verbosity>7) { |
1373 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); | 1388 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); |
1374 } | 1389 } |
1375 | 1390 |
1376 if(dbH->flags & O2_FLAG_L2NORM) | 1391 if(dbH->flags & O2_FLAG_L2NORM) |
1379 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); | 1394 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); |
1380 if(usingQueryPoint) | 1395 if(usingQueryPoint) |
1381 j=1; | 1396 j=1; |
1382 else | 1397 else |
1383 j=numVectors; | 1398 j=numVectors; |
1399 | |
1400 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { | |
1401 if(data_buffer) { | |
1402 free(data_buffer); | |
1403 } | |
1404 { | |
1405 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; | |
1406 void *tmp = malloc(data_buffer_size); | |
1407 if (tmp == NULL) { | |
1408 error("error allocating data buffer"); | |
1409 } | |
1410 data_buffer = (double *) tmp; | |
1411 } | |
1412 } | |
1413 | |
1414 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); | |
1415 | |
1384 while(j--){ | 1416 while(j--){ |
1385 k=trackTable[track]; // number of vectors in track | 1417 k=trackTable[track]; // number of vectors in track |
1386 data=dataBuf+trackOffset; // data for track | 1418 data=data_buffer; // data for track |
1387 while(k--){ | 1419 while(k--){ |
1388 thisDist=0; | 1420 thisDist=0; |
1389 l=dbH->dim; | 1421 l=dbH->dim; |
1390 double* q=query; | 1422 double* q=query; |
1391 while(l--) | 1423 while(l--) |
1451 distances[k]=-DBL_MAX; | 1483 distances[k]=-DBL_MAX; |
1452 qIndexes[k]=~0; | 1484 qIndexes[k]=~0; |
1453 sIndexes[k]=~0; | 1485 sIndexes[k]=~0; |
1454 } | 1486 } |
1455 } // tracks | 1487 } // tracks |
1488 | |
1489 free(data_buffer); | |
1490 | |
1456 gettimeofday(&tv2, NULL); | 1491 gettimeofday(&tv2, NULL); |
1457 | 1492 |
1458 if(verbosity>1) { | 1493 if(verbosity>1) { |
1459 cerr << endl << "processed tracks :" << processedTracks | 1494 cerr << endl << "processed tracks :" << processedTracks |
1460 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | 1495 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; |
1736 double* qp; | 1771 double* qp; |
1737 double* sp; | 1772 double* sp; |
1738 double* dp; | 1773 double* dp; |
1739 | 1774 |
1740 // build track offset table | 1775 // build track offset table |
1741 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; | 1776 off_t *trackOffsetTable = new off_t[dbH->numFiles]; |
1742 unsigned cumTrack=0; | 1777 unsigned cumTrack=0; |
1743 unsigned trackIndexOffset; | 1778 off_t trackIndexOffset; |
1744 for(k=0; k<dbH->numFiles;k++){ | 1779 for(k=0; k<dbH->numFiles;k++){ |
1745 trackOffsetTable[k]=cumTrack; | 1780 trackOffsetTable[k]=cumTrack; |
1746 cumTrack+=trackTable[k]*dbH->dim; | 1781 cumTrack+=trackTable[k]*dbH->dim; |
1747 } | 1782 } |
1748 | 1783 |
1760 double *data_buffer = 0; | 1795 double *data_buffer = 0; |
1761 lseek(dbfid, dbH->dataOffset, SEEK_SET); | 1796 lseek(dbfid, dbH->dataOffset, SEEK_SET); |
1762 | 1797 |
1763 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) { | 1798 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) { |
1764 | 1799 |
1800 trackOffset = trackOffsetTable[track]; // numDoubles offset | |
1801 | |
1765 // get trackID from file if using a control file | 1802 // get trackID from file if using a control file |
1766 if(trackFile){ | 1803 if(trackFile) { |
1767 if(!trackFile->eof()){ | 1804 trackFile->getline(nextKey,MAXSTR); |
1768 trackFile->getline(nextKey,MAXSTR); | 1805 if(!trackFile->eof()) { |
1769 track=getKeyPos(nextKey); | 1806 track = getKeyPos(nextKey); |
1770 } | 1807 trackOffset = trackOffsetTable[track]; |
1771 else | 1808 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); |
1809 } else { | |
1772 break; | 1810 break; |
1773 } | 1811 } |
1774 | 1812 } |
1775 trackOffset=trackOffsetTable[track]; // numDoubles offset | 1813 |
1776 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | 1814 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset |
1777 | 1815 |
1778 if(sequenceLength<=trackTable[track]){ // test for short sequences | 1816 if(sequenceLength<=trackTable[track]){ // test for short sequences |
1779 | 1817 |
1780 if(verbosity>7) { | 1818 if(verbosity>7) { |
2262 double* qp; | 2300 double* qp; |
2263 double* sp; | 2301 double* sp; |
2264 double* dp; | 2302 double* dp; |
2265 | 2303 |
2266 // build track offset table | 2304 // build track offset table |
2267 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; | 2305 off_t *trackOffsetTable = new off_t[dbH->numFiles]; |
2268 unsigned cumTrack=0; | 2306 unsigned cumTrack=0; |
2269 unsigned trackIndexOffset; | 2307 off_t trackIndexOffset; |
2270 for(k=0; k<dbH->numFiles;k++){ | 2308 for(k=0; k<dbH->numFiles;k++){ |
2271 trackOffsetTable[k]=cumTrack; | 2309 trackOffsetTable[k]=cumTrack; |
2272 cumTrack+=trackTable[k]*dbH->dim; | 2310 cumTrack+=trackTable[k]*dbH->dim; |
2273 } | 2311 } |
2274 | 2312 |
2286 double *data_buffer = 0; | 2324 double *data_buffer = 0; |
2287 lseek(dbfid, dbH->dataOffset, SEEK_SET); | 2325 lseek(dbfid, dbH->dataOffset, SEEK_SET); |
2288 | 2326 |
2289 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ | 2327 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ |
2290 | 2328 |
2329 trackOffset = trackOffsetTable[track]; // numDoubles offset | |
2330 | |
2291 // get trackID from file if using a control file | 2331 // get trackID from file if using a control file |
2292 if(trackFile){ | 2332 if(trackFile) { |
2293 if(!trackFile->eof()){ | 2333 trackFile->getline(nextKey,MAXSTR); |
2294 trackFile->getline(nextKey,MAXSTR); | 2334 if(!trackFile->eof()) { |
2295 track=getKeyPos(nextKey); | 2335 track = getKeyPos(nextKey); |
2296 } | 2336 trackOffset = trackOffsetTable[track]; |
2297 else | 2337 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); |
2338 } else { | |
2298 break; | 2339 break; |
2299 } | 2340 } |
2300 | 2341 } |
2301 trackOffset=trackOffsetTable[track]; // numDoubles offset | 2342 |
2302 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | 2343 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset |
2303 | 2344 |
2304 if(sequenceLength<=trackTable[track]){ // test for short sequences | 2345 if(sequenceLength<=trackTable[track]){ // test for short sequences |
2305 | 2346 |
2306 if(verbosity>7) { | 2347 if(verbosity>7) { |