comparison audioDB.cpp @ 199:72a037f2b1e4

Alter timesTable handling. Instead of inserting N boundary points per track, insert N (start,end) pairs. Since N boundaries define only N-1 intervals, we were losing information on insert -- this means that dump/restore cycles of existing times-enabled databases WILL NOT WORK. The input format for times files is still the same, albeit a little tighter: there must be exactly N+1 floats in the file. - make times table default length twice as big (done!) - ensure exactly n+1 times read in from timesFile (done!) - store (beginning, end) in times table (done!) - update file format version (done!) - ensure dump exactly n+1 times (done!) - adjust uses of timesTable to read at 2k and 2k+1 (done!) - timesdata[x] in sequence query is duration of sequence of appropriate length from point [x] (done!) - dbdurs calculation (done!) - don't decrement j when using times in sequence query (done!) - tests! (NOT DONE)
author mas01cr
date Fri, 23 Nov 2007 15:43:51 +0000
parents 8c81cacf5aab
children 7c9feaceeab5
comparison
equal deleted inserted replaced
198:e21cc48ddf4d 199:72a037f2b1e4
393 393
394 retry: 394 retry:
395 do { 395 do {
396 status = fcntl(fd, F_SETLKW, &lock); 396 status = fcntl(fd, F_SETLKW, &lock);
397 } while (status != 0 && errno == EINTR); 397 } while (status != 0 && errno == EINTR);
398 398
399 if (status) { 399 if (status) {
400 if (errno == EAGAIN) { 400 if (errno == EAGAIN) {
401 sleep(1); 401 sleep(1);
402 goto retry; 402 goto retry;
403 } else { 403 } else {
428 * a header (see dbTableHeader struct definition); 428 * a header (see dbTableHeader struct definition);
429 * keyTable: list of keys of tracks; 429 * keyTable: list of keys of tracks;
430 * trackTable: Maps implicit feature index to a feature vector 430 * trackTable: Maps implicit feature index to a feature vector
431 matrix (sizes of tracks) 431 matrix (sizes of tracks)
432 * featureTable: Lots of doubles; 432 * featureTable: Lots of doubles;
433 * timesTable: time points for each feature vector; 433 * timesTable: (start,end) time points for each feature vector;
434 * powerTable: associated power for each feature vector;
434 * l2normTable: squared l2norms for each feature vector. 435 * l2normTable: squared l2norms for each feature vector.
435 */ 436 */
436 void audioDB::create(const char* dbName){ 437 void audioDB::create(const char* dbName){
437 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) 438 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
438 error("Can't create database file", dbName, "open"); 439 error("Can't create database file", dbName, "open");
457 dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE); 458 dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE);
458 dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles); 459 dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles);
459 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles); 460 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles);
460 dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); 461 dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double));
461 dbH->powerTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); 462 dbH->powerTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double));
462 dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->powerTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); 463 dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->powerTableOffset - 2*maxfiles*O2_MEANNUMVECTORS*sizeof(double));
463 dbH->dbSize = size; 464 dbH->dbSize = size;
464 465
465 write(dbfid, dbH, O2_HEADERSIZE); 466 write(dbfid, dbH, O2_HEADERSIZE);
466 467
467 // go to the location corresponding to the last byte 468 // go to the location corresponding to the last byte
531 l2normTableLength = dbH->dbSize - dbH->l2normTableOffset; 532 l2normTableLength = dbH->dbSize - dbH->l2normTableOffset;
532 } else { 533 } else {
533 fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE); 534 fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE);
534 trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE); 535 trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE);
535 dataBufLength = ALIGN_PAGE_UP(dbH->length); 536 dataBufLength = ALIGN_PAGE_UP(dbH->length);
536 timesTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim); 537 timesTableLength = ALIGN_PAGE_UP(2*(dbH->length / dbH->dim));
537 powerTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim); 538 powerTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
538 l2normTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim); 539 l2normTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
539 } 540 }
540 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, fileTableLength); 541 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, fileTableLength);
541 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, trackTableLength); 542 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, trackTableLength);
654 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); 655 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
655 656
656 off_t insertoffset = dbH->length;// Store current state 657 off_t insertoffset = dbH->length;// Store current state
657 658
658 // Check times status and insert times from file 659 // Check times status and insert times from file
659 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); 660 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
660 double* timesdata=timesTable+timesoffset; 661 double *timesdata = timesTable + 2*indexoffset;
661 662
662 if(timesoffset + numVectors > timesTableLength) { 663 if(2*(indexoffset + numVectors) > timesTableLength) {
663 error("out of space for times", key); 664 error("out of space for times", key);
664 } 665 }
665 666
666 insertTimeStamps(numVectors, timesFile, timesdata); 667 if (usingTimes) {
667 668 insertTimeStamps(numVectors, timesFile, timesdata);
668 double *powerdata = powerTable + timesoffset; 669 }
670
671 double *powerdata = powerTable + indexoffset;
669 insertPowerData(numVectors, powerfd, powerdata); 672 insertPowerData(numVectors, powerfd, powerdata);
670 673
671 // Increment file count 674 // Increment file count
672 dbH->numFiles++; 675 dbH->numFiles++;
673 676
674 // Update Header information 677 // Update Header information
675 dbH->length+=(statbuf.st_size-sizeof(int)); 678 dbH->length+=(statbuf.st_size-sizeof(int));
676 679
677 // Update track to file index map 680 // Update track to file index map
678 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); 681 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
679 682
680 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int)); 683 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
681 684
682 // Norm the vectors on input if the database is already L2 normed 685 // Norm the vectors on input if the database is already L2 normed
683 if(dbH->flags & O2_FLAG_L2NORM) 686 if(dbH->flags & O2_FLAG_L2NORM)
696 // CLEAN UP 699 // CLEAN UP
697 munmap(indata,statbuf.st_size); 700 munmap(indata,statbuf.st_size);
698 close(infid); 701 close(infid);
699 } 702 }
700 703
701 void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){ 704 void audioDB::insertTimeStamps(unsigned numVectors, ifstream *timesFile, double *timesdata) {
702 unsigned numtimes=0; 705 assert(usingTimes);
703 if(usingTimes){ 706
704 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) 707 unsigned numtimes = 0;
705 dbH->flags=dbH->flags|O2_FLAG_TIMES; 708
706 else if(!(dbH->flags&O2_FLAG_TIMES)){ 709 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
707 cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl; 710 dbH->flags=dbH->flags|O2_FLAG_TIMES;
708 usingTimes=0; 711 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
709 } 712 error("Timestamp file used with non-timestamped database", timesFileName);
713 }
710 714
711 if(!timesFile->is_open()){ 715 if(!timesFile->is_open()) {
712 if(dbH->flags & O2_FLAG_TIMES){ 716 error("problem opening times file on timestamped database", timesFileName);
713 munmap(indata,statbuf.st_size); 717 }
714 munmap(db,dbH->dbSize); 718
715 error("problem opening times file on timestamped database",timesFileName); 719 double timepoint, next;
716 } 720 *timesFile >> timepoint;
717 else{ 721 if (timesFile->eof()) {
718 cerr << "Warning: problem opening times file. But non-timestamped database, so ignoring times file." << endl; 722 error("no entries in times file", timesFileName);
719 usingTimes=0; 723 }
720 } 724 numtimes++;
721 } 725 do {
722 726 *timesFile >> next;
723 // Process time file 727 if (timesFile->eof()) {
724 if(usingTimes){ 728 break;
725 do{ 729 }
726 *timesFile>>*timesdata++; 730 numtimes++;
727 if(timesFile->eof()) 731 timesdata[0] = timepoint;
728 break; 732 timepoint = (timesdata[1] = next);
729 numtimes++; 733 timesdata += 2;
730 }while(!timesFile->eof() && numtimes<numVectors); 734 } while (numtimes < numVectors + 1);
731 if(!timesFile->eof()){ 735
732 double dummy; 736 if (numtimes < numVectors + 1) {
733 do{ 737 error("too few timepoints in times file", timesFileName);
734 *timesFile>>dummy; 738 }
735 if(timesFile->eof()) 739
736 break; 740 *timesFile >> next;
737 numtimes++; 741 if (!timesFile->eof()) {
738 }while(!timesFile->eof()); 742 error("too many timepoints in times file", timesFileName);
739 } 743 }
740 if(numtimes<numVectors || numtimes>numVectors+2){
741 munmap(indata,statbuf.st_size);
742 munmap(db,dbH->dbSize);
743 close(infid);
744 cerr << "expected " << numVectors << " found " << numtimes << endl;
745 error("Times file is incorrect length for features file",inFile);
746 }
747 if(verbosity>2) {
748 cerr << "numtimes: " << numtimes << endl;
749 }
750 }
751 }
752 } 744 }
753 745
754 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) { 746 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
755 if (usingPower) { 747 if (usingPower) {
756 if (!(dbH->flags & O2_FLAG_POWER)) { 748 if (!(dbH->flags & O2_FLAG_POWER)) {
850 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; 842 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl;
851 } 843 }
852 } 844 }
853 else{ 845 else{
854 if(usingTimes){ 846 if(usingTimes){
855 if(timesFile->eof()) 847 if(timesFile->eof()) {
856 error("not enough timestamp files in timesList"); 848 error("not enough timestamp files in timesList", timesFileName);
857 thisTimesFile=new ifstream(thisTimesFileName,ios::in); 849 }
858 if(!thisTimesFile->is_open()) 850 thisTimesFile = new ifstream(thisTimesFileName,ios::in);
859 error("Cannot open timestamp file",thisTimesFileName); 851 if(!thisTimesFile->is_open()) {
860 off_t insertoffset=dbH->length; 852 error("Cannot open timestamp file", thisTimesFileName);
861 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); 853 }
862 double* timesdata=timesTable+timesoffset; 854 off_t insertoffset = dbH->length;
863 if(timesoffset + numVectors > timesTableLength) { 855 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
856 double *timesdata = timesTable + 2*indexoffset;
857 if(2*(indexoffset + numVectors) > timesTableLength) {
864 error("out of space for times", key); 858 error("out of space for times", key);
865 } 859 }
866 insertTimeStamps(numVectors,thisTimesFile,timesdata); 860 insertTimeStamps(numVectors, thisTimesFile, timesdata);
867 if(thisTimesFile) 861 if(thisTimesFile)
868 delete thisTimesFile; 862 delete thisTimesFile;
869 } 863 }
870 864
871 if (usingPower) { 865 if (usingPower) {
1104 free(data_buffer); 1098 free(data_buffer);
1105 1099
1106 fprintf(fLFile, "%s\n", fName); 1100 fprintf(fLFile, "%s\n", fName);
1107 close(ffd); 1101 close(ffd);
1108 1102
1109 if(times) { 1103 if (times) {
1110 snprintf(fName, 256, "%05d.times", k); 1104 snprintf(fName, 256, "%05d.times", k);
1111 tFile = fopen(fName, "w"); 1105 tFile = fopen(fName, "w");
1112 for(unsigned i = 0; i < trackTable[k]; i++) { 1106 for(unsigned i = 0; i < trackTable[k]; i++) {
1113 // KLUDGE: specifying 16 digits of precision after the decimal 1107 // KLUDGE: specifying 16 digits of precision after the decimal
1114 // point is (but check this!) sufficient to uniquely identify 1108 // point is (but check this!) sufficient to uniquely identify
1115 // doubles; however, that will cause ugliness, as that's 1109 // doubles; however, that will cause ugliness, as that's
1116 // vastly too many for most values of interest. Moving to %a 1110 // vastly too many for most values of interest. Moving to %a
1117 // here and scanf() in the timesFile reading might fix this. 1111 // here and scanf() in the timesFile reading might fix this.
1118 // -- CSR, 2007-10-19 1112 // -- CSR, 2007-10-19
1119 fprintf(tFile, "%.16e\n", *(timesTable + pos + i)); 1113 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*i));
1120 } 1114 }
1115 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*trackTable[k]-1));
1116
1121 fprintf(tLFile, "%s\n", fName); 1117 fprintf(tLFile, "%s\n", fName);
1122 } 1118 }
1123 1119
1124 if (power) { 1120 if (power) {
1125 uint32_t one = 1; 1121 uint32_t one = 1;
1290 unsigned k,l,n; 1286 unsigned k,l,n;
1291 double thisDist; 1287 double thisDist;
1292 1288
1293 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double)); 1289 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double));
1294 double meanQdur = 0; 1290 double meanQdur = 0;
1295 double* timesdata = 0; 1291 double *timesdata = 0;
1296 double* dbdurs = 0; 1292 double *querydurs = 0;
1293 double *dbdurs = 0;
1297 1294
1298 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ 1295 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
1299 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; 1296 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
1300 usingTimes=0; 1297 usingTimes=0;
1301 } 1298 }
1302 1299
1303 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 1300 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
1304 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; 1301 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
1305 1302
1306 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ 1303 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
1307 timesdata = new double[numVectors]; 1304 timesdata = new double[2*numVectors];
1305 querydurs = new double[numVectors];
1308 insertTimeStamps(numVectors, timesFile, timesdata); 1306 insertTimeStamps(numVectors, timesFile, timesdata);
1309 // Calculate durations of points 1307 // Calculate durations of points
1310 for(k=0; k<numVectors-1; k++){ 1308 for(k=0; k<numVectors-1; k++){
1311 timesdata[k]=timesdata[k+1]-timesdata[k]; 1309 querydurs[k]=timesdata[2*k+1]-timesdata[2*k];
1312 meanQdur+=timesdata[k]; 1310 meanQdur+=querydurs[k];
1313 } 1311 }
1314 meanQdur/=k; 1312 meanQdur/=k;
1315 // Individual exhaustive timepoint durations 1313 // Individual exhaustive timepoint durations
1316 dbdurs = new double[totalVecs]; 1314 dbdurs = new double[totalVecs];
1317 for(k=0; k<totalVecs-1; k++) 1315 for(k=0; k<totalVecs-1; k++) {
1318 dbdurs[k]=timesTable[k+1]-timesTable[k]; 1316 dbdurs[k]=timesTable[2*k+1]-timesTable[2*k];
1319 j--; // decrement vector counter by one 1317 }
1320 } 1318 }
1321 1319
1322 if(usingQueryPoint) 1320 if(usingQueryPoint)
1323 if(queryPoint>numVectors-1) 1321 if(queryPoint>numVectors-1)
1324 error("queryPoint > numVectors in query"); 1322 error("queryPoint > numVectors in query");
1341 double* q=query; 1339 double* q=query;
1342 while(l--) 1340 while(l--)
1343 thisDist+=*q++**data++; 1341 thisDist+=*q++**data++;
1344 if(!usingTimes || 1342 if(!usingTimes ||
1345 (usingTimes 1343 (usingTimes
1346 && fabs(dbdurs[totalVecs-k-1]-timesdata[numVectors-j-1])<timesdata[numVectors-j-1]*timesTol)){ 1344 && fabs(dbdurs[totalVecs-k-1]-querydurs[numVectors-j-1])<querydurs[numVectors-j-1]*timesTol)){
1347 n=pointNN; 1345 n=pointNN;
1348 while(n--){ 1346 while(n--){
1349 if(thisDist>=distances[n]){ 1347 if(thisDist>=distances[n]){
1350 if((n==0 || thisDist<=distances[n-1])){ 1348 if((n==0 || thisDist<=distances[n-1])){
1351 // Copy all values above up the queue 1349 // Copy all values above up the queue
1426 if(queryCopy) 1424 if(queryCopy)
1427 delete queryCopy; 1425 delete queryCopy;
1428 if(qNorm) 1426 if(qNorm)
1429 delete qNorm; 1427 delete qNorm;
1430 if(timesdata) 1428 if(timesdata)
1431 delete timesdata; 1429 delete[] timesdata;
1430 if(querydurs)
1431 delete[] querydurs;
1432 if(dbdurs) 1432 if(dbdurs)
1433 delete dbdurs; 1433 delete dbdurs;
1434 } 1434 }
1435 1435
1436 // trackPointQuery 1436 // trackPointQuery
1484 trackSIndexes[k]=~0; 1484 trackSIndexes[k]=~0;
1485 trackIDs[k]=~0; 1485 trackIDs[k]=~0;
1486 } 1486 }
1487 1487
1488 double meanQdur = 0; 1488 double meanQdur = 0;
1489 double* timesdata = 0; 1489 double *timesdata = 0;
1490 double* meanDBdur = 0; 1490 double *querydurs = 0;
1491 double *meanDBdur = 0;
1491 1492
1492 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ 1493 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
1493 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; 1494 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
1494 usingTimes=0; 1495 usingTimes=0;
1495 } 1496 }
1496 1497
1497 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 1498 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
1498 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; 1499 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
1499 1500
1500 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ 1501 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
1501 timesdata = new double[numVectors]; 1502 timesdata = new double[2*numVectors];
1503 querydurs = new double[numVectors];
1502 insertTimeStamps(numVectors, timesFile, timesdata); 1504 insertTimeStamps(numVectors, timesFile, timesdata);
1503 // Calculate durations of points 1505 // Calculate durations of points
1504 for(k=0; k<numVectors-1; k++){ 1506 for(k=0; k<numVectors-1; k++) {
1505 timesdata[k]=timesdata[k+1]-timesdata[k]; 1507 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
1506 meanQdur+=timesdata[k]; 1508 meanQdur += querydurs[k];
1507 } 1509 }
1508 meanQdur/=k; 1510 meanQdur/=k;
1509 meanDBdur = new double[dbH->numFiles]; 1511 meanDBdur = new double[dbH->numFiles];
1510 for(k=0; k<dbH->numFiles; k++){ 1512 for(k=0; k<dbH->numFiles; k++){
1511 meanDBdur[k]=0.0; 1513 meanDBdur[k]=0.0;
1512 for(j=0; j<trackTable[k]-1 ; j++) 1514 for(j=0; j<trackTable[k]-1 ; j++) {
1513 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; 1515 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
1516 }
1514 meanDBdur[k]/=j; 1517 meanDBdur[k]/=j;
1515 } 1518 }
1516 } 1519 }
1517 1520
1518 if(usingQueryPoint) 1521 if(usingQueryPoint)
1699 adbQueryResponse->result.Spos[k]=trackSIndexes[k]; 1702 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
1700 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); 1703 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
1701 } 1704 }
1702 } 1705 }
1703 1706
1704
1705 // Clean up 1707 // Clean up
1706 if(trackOffsetTable) 1708 if(trackOffsetTable)
1707 delete trackOffsetTable; 1709 delete trackOffsetTable;
1708 if(queryCopy) 1710 if(queryCopy)
1709 delete queryCopy; 1711 delete queryCopy;
1710 if(qNorm) 1712 if(qNorm)
1711 delete qNorm; 1713 delete qNorm;
1712 if(timesdata) 1714 if(timesdata)
1713 delete timesdata; 1715 delete[] timesdata;
1716 if(querydurs)
1717 delete[] querydurs;
1714 if(meanDBdur) 1718 if(meanDBdur)
1715 delete meanDBdur; 1719 delete meanDBdur;
1716
1717 } 1720 }
1718 1721
1719 // This is a common pattern in sequence queries: what we are doing is 1722 // This is a common pattern in sequence queries: what we are doing is
1720 // taking a window of length seqlen over a buffer of length length, 1723 // taking a window of length seqlen over a buffer of length length,
1721 // and placing the sum of the elements in that window in the first 1724 // and placing the sum of the elements in that window in the first
1887 trackIDs[k]=~0; 1890 trackIDs[k]=~0;
1888 } 1891 }
1889 1892
1890 // Timestamp and durations processing 1893 // Timestamp and durations processing
1891 double meanQdur = 0; 1894 double meanQdur = 0;
1892 double* timesdata = 0; 1895 double *timesdata = 0;
1893 double* meanDBdur = 0; 1896 double *querydurs = 0;
1897 double *meanDBdur = 0;
1894 1898
1895 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ 1899 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
1896 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; 1900 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
1897 usingTimes=0; 1901 usingTimes=0;
1898 } 1902 }
1899 1903
1900 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 1904 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
1901 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; 1905 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
1902 1906
1903 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ 1907 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
1904 timesdata = new double[numVectors]; 1908 timesdata = new double[2*numVectors];
1905 assert(timesdata); 1909 querydurs = new double[numVectors];
1910
1906 insertTimeStamps(numVectors, timesFile, timesdata); 1911 insertTimeStamps(numVectors, timesFile, timesdata);
1907 // Calculate durations of points 1912 // Calculate durations of points
1908 for(k=0; k<numVectors-1; k++){ 1913 for(k=0; k<numVectors-1; k++) {
1909 timesdata[k]=timesdata[k+1]-timesdata[k]; 1914 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
1910 meanQdur+=timesdata[k]; 1915 meanQdur += querydurs[k];
1911 } 1916 }
1912 meanQdur/=k; 1917 meanQdur/=k;
1913 if(verbosity>1) { 1918 if(verbosity>1) {
1914 cerr << "mean query file duration: " << meanQdur << endl; 1919 cerr << "mean query file duration: " << meanQdur << endl;
1915 } 1920 }
1916 meanDBdur = new double[dbH->numFiles]; 1921 meanDBdur = new double[dbH->numFiles];
1917 assert(meanDBdur); 1922 assert(meanDBdur);
1918 for(k=0; k<dbH->numFiles; k++){ 1923 for(k=0; k<dbH->numFiles; k++){
1919 meanDBdur[k]=0.0; 1924 meanDBdur[k]=0.0;
1920 for(j=0; j<trackTable[k]-1 ; j++) 1925 for(j=0; j<trackTable[k]-1 ; j++) {
1921 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; 1926 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
1927 }
1922 meanDBdur[k]/=j; 1928 meanDBdur[k]/=j;
1923 } 1929 }
1924 } 1930 }
1925 1931
1926 if(usingQueryPoint) 1932 if(usingQueryPoint)
2244 delete[] D; 2250 delete[] D;
2245 if(DD) 2251 if(DD)
2246 delete[] DD; 2252 delete[] DD;
2247 if(timesdata) 2253 if(timesdata)
2248 delete[] timesdata; 2254 delete[] timesdata;
2255 if(querydurs)
2256 delete[] querydurs;
2249 if(meanDBdur) 2257 if(meanDBdur)
2250 delete[] meanDBdur; 2258 delete[] meanDBdur;
2251 } 2259 }
2252 2260
2253 // Radius search between query and target tracks 2261 // Radius search between query and target tracks
2376 trackIDs[k]=~0; 2384 trackIDs[k]=~0;
2377 } 2385 }
2378 2386
2379 // Timestamp and durations processing 2387 // Timestamp and durations processing
2380 double meanQdur = 0; 2388 double meanQdur = 0;
2381 double* timesdata = 0; 2389 double *timesdata = 0;
2382 double* meanDBdur = 0; 2390 double *querydurs = 0;
2391 double *meanDBdur = 0;
2383 2392
2384 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ 2393 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
2385 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; 2394 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
2386 usingTimes=0; 2395 usingTimes=0;
2387 } 2396 }
2388 2397
2389 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 2398 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
2390 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; 2399 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
2391 2400
2392 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ 2401 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
2393 timesdata = new double[numVectors]; 2402 timesdata = new double[2*numVectors];
2394 assert(timesdata); 2403 querydurs = new double[numVectors];
2404
2395 insertTimeStamps(numVectors, timesFile, timesdata); 2405 insertTimeStamps(numVectors, timesFile, timesdata);
2396 // Calculate durations of points 2406 // Calculate durations of points
2397 for(k=0; k<numVectors-1; k++){ 2407 for(k=0; k<numVectors-1; k++){
2398 timesdata[k]=timesdata[k+1]-timesdata[k]; 2408 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
2399 meanQdur+=timesdata[k]; 2409 meanQdur += querydurs[k];
2400 } 2410 }
2401 meanQdur/=k; 2411 meanQdur/=k;
2402 if(verbosity>1) { 2412 if(verbosity>1) {
2403 cerr << "mean query file duration: " << meanQdur << endl; 2413 cerr << "mean query file duration: " << meanQdur << endl;
2404 } 2414 }
2405 meanDBdur = new double[dbH->numFiles]; 2415 meanDBdur = new double[dbH->numFiles];
2406 assert(meanDBdur); 2416 assert(meanDBdur);
2407 for(k=0; k<dbH->numFiles; k++){ 2417 for(k=0; k<dbH->numFiles; k++){
2408 meanDBdur[k]=0.0; 2418 meanDBdur[k]=0.0;
2409 for(j=0; j<trackTable[k]-1 ; j++) 2419 for(j=0; j<trackTable[k]-1 ; j++) {
2410 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; 2420 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
2421 }
2411 meanDBdur[k]/=j; 2422 meanDBdur[k]/=j;
2412 } 2423 }
2413 } 2424 }
2414 2425
2415 if(usingQueryPoint) 2426 if(usingQueryPoint)
2711 delete[] D; 2722 delete[] D;
2712 if(DD) 2723 if(DD)
2713 delete[] DD; 2724 delete[] DD;
2714 if(timesdata) 2725 if(timesdata)
2715 delete[] timesdata; 2726 delete[] timesdata;
2727 if(querydurs)
2728 delete[] querydurs;
2716 if(meanDBdur) 2729 if(meanDBdur)
2717 delete[] meanDBdur; 2730 delete[] meanDBdur;
2718 } 2731 }
2719 2732
2720 // Unit norm block of features 2733 // Unit norm block of features