Mercurial > hg > audiodb
comparison audioDB.cpp @ 193:f9d16137e704
Merge powertable branch -r168:227 to trunk.
author | mas01cr |
---|---|
date | Wed, 21 Nov 2007 11:35:44 +0000 |
parents | cdd441dcc9a8 |
children | 0e75deb7d4d1 |
comparison
equal
deleted
inserted
replaced
170:10bcea4e5c40 | 193:f9d16137e704 |
---|---|
70 status(dbName); | 70 status(dbName); |
71 | 71 |
72 else if(O2_ACTION(COM_L2NORM)) | 72 else if(O2_ACTION(COM_L2NORM)) |
73 l2norm(dbName); | 73 l2norm(dbName); |
74 | 74 |
75 else if(O2_ACTION(COM_POWER)) | |
76 power_flag(dbName); | |
77 | |
75 else if(O2_ACTION(COM_DUMP)) | 78 else if(O2_ACTION(COM_DUMP)) |
76 dump(dbName); | 79 dump(dbName); |
77 | 80 |
78 else | 81 else |
79 error("Unrecognized command",command); | 82 error("Unrecognized command",command); |
218 command=COM_L2NORM; | 221 command=COM_L2NORM; |
219 dbName=args_info.database_arg; | 222 dbName=args_info.database_arg; |
220 return 0; | 223 return 0; |
221 } | 224 } |
222 | 225 |
226 if(args_info.POWER_given){ | |
227 command=COM_POWER; | |
228 dbName=args_info.database_arg; | |
229 return 0; | |
230 } | |
231 | |
223 if(args_info.INSERT_given){ | 232 if(args_info.INSERT_given){ |
224 command=COM_INSERT; | 233 command=COM_INSERT; |
225 dbName=args_info.database_arg; | 234 dbName=args_info.database_arg; |
226 inFile=args_info.features_arg; | 235 inFile=args_info.features_arg; |
227 if(args_info.key_given) | 236 if(args_info.key_given) |
232 if(!(timesFile = new ifstream(timesFileName,ios::in))) | 241 if(!(timesFile = new ifstream(timesFileName,ios::in))) |
233 error("Could not open times file for reading", timesFileName); | 242 error("Could not open times file for reading", timesFileName); |
234 usingTimes=1; | 243 usingTimes=1; |
235 } | 244 } |
236 } | 245 } |
246 if (args_info.power_given) { | |
247 powerFileName = args_info.power_arg; | |
248 if (strlen(powerFileName) > 0) { | |
249 if (!(powerfd = open(powerFileName, O_RDONLY))) { | |
250 error("Could not open power file for reading", powerFileName, "open"); | |
251 } | |
252 usingPower = 1; | |
253 } | |
254 } | |
237 return 0; | 255 return 0; |
238 } | 256 } |
239 | 257 |
240 if(args_info.BATCHINSERT_given){ | 258 if(args_info.BATCHINSERT_given){ |
241 command=COM_BATCHINSERT; | 259 command=COM_BATCHINSERT; |
257 timesFileName=args_info.timesList_arg; | 275 timesFileName=args_info.timesList_arg; |
258 if(strlen(timesFileName)>0){ | 276 if(strlen(timesFileName)>0){ |
259 if(!(timesFile = new ifstream(timesFileName,ios::in))) | 277 if(!(timesFile = new ifstream(timesFileName,ios::in))) |
260 error("Could not open timesList file for reading", timesFileName); | 278 error("Could not open timesList file for reading", timesFileName); |
261 usingTimes=1; | 279 usingTimes=1; |
280 } | |
281 } | |
282 if(args_info.powerList_given){ | |
283 powerFileName=args_info.powerList_arg; | |
284 if(strlen(powerFileName)>0){ | |
285 if(!(powerFile = new ifstream(powerFileName,ios::in))) | |
286 error("Could not open powerList file for reading", powerFileName); | |
287 usingPower=1; | |
262 } | 288 } |
263 } | 289 } |
264 return 0; | 290 return 0; |
265 } | 291 } |
266 | 292 |
280 timesFileName=args_info.times_arg; | 306 timesFileName=args_info.times_arg; |
281 if(strlen(timesFileName)>0){ | 307 if(strlen(timesFileName)>0){ |
282 if(!(timesFile = new ifstream(timesFileName,ios::in))) | 308 if(!(timesFile = new ifstream(timesFileName,ios::in))) |
283 error("Could not open times file for reading", timesFileName); | 309 error("Could not open times file for reading", timesFileName); |
284 usingTimes=1; | 310 usingTimes=1; |
311 } | |
312 } | |
313 | |
314 if(args_info.power_given){ | |
315 powerFileName=args_info.power_arg; | |
316 if(strlen(powerFileName)>0){ | |
317 if (!(powerfd = open(powerFileName, O_RDONLY))) { | |
318 error("Could not open power file for reading", powerFileName, "open"); | |
319 } | |
320 usingPower = 1; | |
285 } | 321 } |
286 } | 322 } |
287 | 323 |
288 // query type | 324 // query type |
289 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) | 325 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) |
316 } | 352 } |
317 sequenceHop = args_info.sequencehop_arg; | 353 sequenceHop = args_info.sequencehop_arg; |
318 if(sequenceHop < 1 || sequenceHop > 1000) { | 354 if(sequenceHop < 1 || sequenceHop > 1000) { |
319 error("seqhop out of range: 1 <= seqhop <= 1000"); | 355 error("seqhop out of range: 1 <= seqhop <= 1000"); |
320 } | 356 } |
357 | |
358 if (args_info.absolute_threshold_given) { | |
359 if (args_info.absolute_threshold_arg >= 0) { | |
360 error("absolute threshold out of range: should be negative"); | |
361 } | |
362 use_absolute_threshold = true; | |
363 absolute_threshold = args_info.absolute_threshold_arg; | |
364 } | |
365 if (args_info.relative_threshold_given) { | |
366 use_relative_threshold = true; | |
367 relative_threshold = args_info.relative_threshold_arg; | |
368 } | |
321 return 0; | 369 return 0; |
322 } | 370 } |
323 return -1; // no command found | 371 return -1; // no command found |
324 } | 372 } |
325 | 373 |
410 dbH->fileTableOffset = ALIGN_UP(O2_HEADERSIZE, 8); | 458 dbH->fileTableOffset = ALIGN_UP(O2_HEADERSIZE, 8); |
411 dbH->trackTableOffset = ALIGN_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles, 8); | 459 dbH->trackTableOffset = ALIGN_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles, 8); |
412 dbH->dataOffset = ALIGN_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles, 8); | 460 dbH->dataOffset = ALIGN_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles, 8); |
413 dbH->l2normTableOffset = ALIGN_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8); | 461 dbH->l2normTableOffset = ALIGN_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8); |
414 dbH->timesTableOffset = ALIGN_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8); | 462 dbH->timesTableOffset = ALIGN_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8); |
463 dbH->powerTableOffset = ALIGN_DOWN(dbH->timesTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8); | |
415 dbH->dbSize = size; | 464 dbH->dbSize = size; |
416 | 465 |
417 memcpy (db, dbH, O2_HEADERSIZE); | 466 memcpy (db, dbH, O2_HEADERSIZE); |
418 if(verbosity) { | 467 if(verbosity) { |
419 cerr << COM_CREATE << " " << dbName << endl; | 468 cerr << COM_CREATE << " " << dbName << endl; |
468 fileTable = (char *) (db + dbH->fileTableOffset); | 517 fileTable = (char *) (db + dbH->fileTableOffset); |
469 trackTable = (unsigned *) (db + dbH->trackTableOffset); | 518 trackTable = (unsigned *) (db + dbH->trackTableOffset); |
470 dataBuf = (double *) (db + dbH->dataOffset); | 519 dataBuf = (double *) (db + dbH->dataOffset); |
471 l2normTable = (double *) (db + dbH->l2normTableOffset); | 520 l2normTable = (double *) (db + dbH->l2normTableOffset); |
472 timesTable = (double *) (db + dbH->timesTableOffset); | 521 timesTable = (double *) (db + dbH->timesTableOffset); |
522 powerTable = (double *) (db + dbH->powerTableOffset); | |
473 } | 523 } |
474 | 524 |
475 void audioDB::initInputFile (const char *inFile) { | 525 void audioDB::initInputFile (const char *inFile) { |
476 if (inFile) { | 526 if (inFile) { |
477 if ((infid = open(inFile, O_RDONLY)) < 0) { | 527 if ((infid = open(inFile, O_RDONLY)) < 0) { |
520 | 570 |
521 initTables(dbName, 1, inFile); | 571 initTables(dbName, 1, inFile); |
522 | 572 |
523 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 573 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) |
524 error("Must use timestamps with timestamped database","use --times"); | 574 error("Must use timestamps with timestamped database","use --times"); |
575 | |
576 if(!usingPower && (dbH->flags & O2_FLAG_POWER)) | |
577 error("Must use power with power-enabled database", dbName); | |
525 | 578 |
526 // Check that there is room for at least 1 more file | 579 // Check that there is room for at least 1 more file |
527 if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int))) | 580 if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int))) |
528 error("Insert failed: no more room in database", inFile); | 581 error("Insert failed: no more room in database", inFile); |
529 | 582 |
564 // Check times status and insert times from file | 617 // Check times status and insert times from file |
565 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); | 618 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); |
566 double* timesdata=timesTable+timesoffset; | 619 double* timesdata=timesTable+timesoffset; |
567 assert(timesdata+numVectors<l2normTable); | 620 assert(timesdata+numVectors<l2normTable); |
568 insertTimeStamps(numVectors, timesFile, timesdata); | 621 insertTimeStamps(numVectors, timesFile, timesdata); |
622 | |
623 double *powerdata = powerTable + timesoffset; | |
624 insertPowerData(numVectors, powerfd, powerdata); | |
569 | 625 |
570 // Increment file count | 626 // Increment file count |
571 dbH->numFiles++; | 627 dbH->numFiles++; |
572 | 628 |
573 // Update Header information | 629 // Update Header information |
650 } | 706 } |
651 } | 707 } |
652 } | 708 } |
653 } | 709 } |
654 | 710 |
711 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) { | |
712 if (usingPower) { | |
713 if (!(dbH->flags & O2_FLAG_POWER)) { | |
714 error("Cannot insert power data on non-power DB", dbName); | |
715 } | |
716 | |
717 int one; | |
718 unsigned int count; | |
719 | |
720 count = read(powerfd, &one, sizeof(unsigned int)); | |
721 if (count != sizeof(unsigned int)) { | |
722 error("powerfd read failed", "int", "read"); | |
723 } | |
724 if (one != 1) { | |
725 error("dimensionality of power file not 1", powerFileName); | |
726 } | |
727 | |
728 // FIXME: should check that the powerfile is the right size for | |
729 // this. -- CSR, 2007-10-30 | |
730 count = read(powerfd, powerdata, numVectors * sizeof(double)); | |
731 if (count != numVectors * sizeof(double)) { | |
732 error("powerfd read failed", "double", "read"); | |
733 } | |
734 } | |
735 } | |
736 | |
655 void audioDB::batchinsert(const char* dbName, const char* inFile) { | 737 void audioDB::batchinsert(const char* dbName, const char* inFile) { |
656 | 738 |
657 initDBHeader(dbName, true); | 739 initDBHeader(dbName, true); |
658 | 740 |
659 if(!key) | 741 if(!key) |
660 key=inFile; | 742 key=inFile; |
661 ifstream *filesIn = 0; | 743 ifstream *filesIn = 0; |
662 ifstream *keysIn = 0; | 744 ifstream *keysIn = 0; |
663 ifstream* thisTimesFile = 0; | 745 ifstream* thisTimesFile = 0; |
746 int thispowerfd = 0; | |
664 | 747 |
665 if(!(filesIn = new ifstream(inFile))) | 748 if(!(filesIn = new ifstream(inFile))) |
666 error("Could not open batch in file", inFile); | 749 error("Could not open batch in file", inFile); |
667 if(key && key!=inFile) | 750 if(key && key!=inFile) |
668 if(!(keysIn = new ifstream(key))) | 751 if(!(keysIn = new ifstream(key))) |
669 error("Could not open batch key file",key); | 752 error("Could not open batch key file",key); |
670 | 753 |
671 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 754 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) |
672 error("Must use timestamps with timestamped database","use --times"); | 755 error("Must use timestamps with timestamped database","use --times"); |
673 | 756 |
757 if(!usingPower && (dbH->flags & O2_FLAG_POWER)) | |
758 error("Must use power with power-enabled database", dbName); | |
759 | |
674 unsigned totalVectors=0; | 760 unsigned totalVectors=0; |
675 char *thisKey = new char[MAXSTR]; | 761 char *thisKey = new char[MAXSTR]; |
676 char *thisFile = new char[MAXSTR]; | 762 char *thisFile = new char[MAXSTR]; |
677 char *thisTimesFileName = new char[MAXSTR]; | 763 char *thisTimesFileName = new char[MAXSTR]; |
764 char *thisPowerFileName = new char[MAXSTR]; | |
678 | 765 |
679 do{ | 766 do{ |
680 filesIn->getline(thisFile,MAXSTR); | 767 filesIn->getline(thisFile,MAXSTR); |
681 if(key && key!=inFile) | 768 if(key && key!=inFile) |
682 keysIn->getline(thisKey,MAXSTR); | 769 keysIn->getline(thisKey,MAXSTR); |
683 else | 770 else |
684 thisKey = thisFile; | 771 thisKey = thisFile; |
685 if(usingTimes) | 772 if(usingTimes) |
686 timesFile->getline(thisTimesFileName,MAXSTR); | 773 timesFile->getline(thisTimesFileName,MAXSTR); |
774 if(usingPower) | |
775 powerFile->getline(thisPowerFileName, MAXSTR); | |
687 | 776 |
688 if(filesIn->eof()) | 777 if(filesIn->eof()) |
689 break; | 778 break; |
690 | 779 |
691 initInputFile(thisFile); | 780 initInputFile(thisFile); |
715 if(!numVectors){ | 804 if(!numVectors){ |
716 if(verbosity) { | 805 if(verbosity) { |
717 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; | 806 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; |
718 } | 807 } |
719 } | 808 } |
720 else{ | 809 else{ |
721 if(usingTimes){ | 810 if(usingTimes){ |
722 if(timesFile->eof()) | 811 if(timesFile->eof()) |
723 error("not enough timestamp files in timesList"); | 812 error("not enough timestamp files in timesList"); |
724 thisTimesFile=new ifstream(thisTimesFileName,ios::in); | 813 thisTimesFile=new ifstream(thisTimesFileName,ios::in); |
725 if(!thisTimesFile->is_open()) | 814 if(!thisTimesFile->is_open()) |
730 assert(timesdata+numVectors<l2normTable); | 819 assert(timesdata+numVectors<l2normTable); |
731 insertTimeStamps(numVectors,thisTimesFile,timesdata); | 820 insertTimeStamps(numVectors,thisTimesFile,timesdata); |
732 if(thisTimesFile) | 821 if(thisTimesFile) |
733 delete thisTimesFile; | 822 delete thisTimesFile; |
734 } | 823 } |
735 | 824 |
825 if (usingPower) { | |
826 if(powerFile->eof()) { | |
827 error("not enough power files in powerList", powerFileName); | |
828 } | |
829 thispowerfd = open(thisPowerFileName, O_RDONLY); | |
830 if (thispowerfd < 0) { | |
831 error("failed to open power file", thisPowerFileName); | |
832 } | |
833 unsigned insertoffset = dbH->length; | |
834 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double)); | |
835 double *powerdata = powerTable + poweroffset; | |
836 insertPowerData(numVectors, thispowerfd, powerdata); | |
837 if (0 < thispowerfd) { | |
838 close(thispowerfd); | |
839 } | |
840 } | |
736 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); | 841 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); |
737 | 842 |
738 unsigned insertoffset = dbH->length;// Store current state | 843 unsigned insertoffset = dbH->length;// Store current state |
739 | 844 |
740 // Increment file count | 845 // Increment file count |
877 | 982 |
878 if((chdir(output)) < 0) { | 983 if((chdir(output)) < 0) { |
879 error("error changing working directory", output, "chdir"); | 984 error("error changing working directory", output, "chdir"); |
880 } | 985 } |
881 | 986 |
882 int fLfd, tLfd = 0, kLfd; | 987 int fLfd, tLfd = 0, pLfd = 0, kLfd; |
883 FILE *fLFile, *tLFile = 0, *kLFile; | 988 FILE *fLFile, *tLFile = 0, *pLFile = 0, *kLFile; |
884 | 989 |
885 if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | 990 if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { |
886 error("error creating featureList file", "featureList.txt", "open"); | 991 error("error creating featureList file", "featureList.txt", "open"); |
887 } | 992 } |
993 | |
888 int times = dbH->flags & O2_FLAG_TIMES; | 994 int times = dbH->flags & O2_FLAG_TIMES; |
889 if (times) { | 995 if (times) { |
890 if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | 996 if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { |
891 error("error creating timesList file", "timesList.txt", "open"); | 997 error("error creating timesList file", "timesList.txt", "open"); |
892 } | 998 } |
893 } | 999 } |
1000 | |
1001 int power = dbH->flags & O2_FLAG_POWER; | |
1002 if (power) { | |
1003 if ((pLfd = open("powerList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1004 error("error creating powerList file", "powerList.txt", "open"); | |
1005 } | |
1006 } | |
1007 | |
894 if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | 1008 if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { |
895 error("error creating keyList file", "keyList.txt", "open"); | 1009 error("error creating keyList file", "keyList.txt", "open"); |
896 } | 1010 } |
897 | 1011 |
898 /* can these fail? I sincerely hope not. */ | 1012 /* can these fail? I sincerely hope not. */ |
899 fLFile = fdopen(fLfd, "w"); | 1013 fLFile = fdopen(fLfd, "w"); |
900 if (times) { | 1014 if (times) { |
901 tLFile = fdopen(tLfd, "w"); | 1015 tLFile = fdopen(tLfd, "w"); |
902 } | 1016 } |
1017 if (power) { | |
1018 pLFile = fdopen(pLfd, "w"); | |
1019 } | |
903 kLFile = fdopen(kLfd, "w"); | 1020 kLFile = fdopen(kLfd, "w"); |
904 | 1021 |
905 char *fName = new char[256]; | 1022 char *fName = new char[256]; |
906 int ffd; | 1023 int ffd, pfd; |
907 FILE *tFile; | 1024 FILE *tFile; |
908 unsigned pos = 0; | 1025 unsigned pos = 0; |
909 for(unsigned k = 0; k < dbH->numFiles; k++) { | 1026 for(unsigned k = 0; k < dbH->numFiles; k++) { |
910 fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE); | 1027 fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE); |
911 snprintf(fName, 256, "%05d.features", k); | 1028 snprintf(fName, 256, "%05d.features", k); |
934 // -- CSR, 2007-10-19 | 1051 // -- CSR, 2007-10-19 |
935 fprintf(tFile, "%.16e\n", *(timesTable + pos + i)); | 1052 fprintf(tFile, "%.16e\n", *(timesTable + pos + i)); |
936 } | 1053 } |
937 fprintf(tLFile, "%s\n", fName); | 1054 fprintf(tLFile, "%s\n", fName); |
938 } | 1055 } |
1056 | |
1057 if (power) { | |
1058 uint32_t one = 1; | |
1059 snprintf(fName, 256, "%05d.power", k); | |
1060 if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1061 error("error creating power file", fName, "open"); | |
1062 } | |
1063 if ((write(pfd, &one, sizeof(uint32_t))) < 0) { | |
1064 error("error writing one", fName, "write"); | |
1065 } | |
1066 if ((write(pfd, powerTable + pos, trackTable[k] * sizeof(double))) < 0) { | |
1067 error("error writing data", fName, "write"); | |
1068 } | |
1069 fprintf(pLFile, "%s\n", fName); | |
1070 close(pfd); | |
1071 } | |
939 | 1072 |
940 pos += trackTable[k]; | 1073 pos += trackTable[k]; |
941 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; | 1074 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; |
942 } | 1075 } |
943 | 1076 |
952 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\ | 1085 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\ |
953 \"${AUDIODB}\" -d \"$1\" -N --size=%d\n", dbH->dbSize / 1000000); | 1086 \"${AUDIODB}\" -d \"$1\" -N --size=%d\n", dbH->dbSize / 1000000); |
954 if(dbH->flags & O2_FLAG_L2NORM) { | 1087 if(dbH->flags & O2_FLAG_L2NORM) { |
955 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n"); | 1088 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n"); |
956 } | 1089 } |
1090 if(power) { | |
1091 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -P\n"); | |
1092 } | |
957 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt"); | 1093 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt"); |
958 if(times) { | 1094 if(times) { |
959 fprintf(scriptFile, " -T timesList.txt"); | 1095 fprintf(scriptFile, " -T timesList.txt"); |
960 } | 1096 } |
1097 if(power) { | |
1098 fprintf(scriptFile, " -W powerList.txt"); | |
1099 } | |
961 fprintf(scriptFile, "\n"); | 1100 fprintf(scriptFile, "\n"); |
962 fclose(scriptFile); | 1101 fclose(scriptFile); |
963 | 1102 |
964 if((chdir(cwd)) < 0) { | 1103 if((chdir(cwd)) < 0) { |
965 error("error changing working directory", cwd, "chdir"); | 1104 error("error changing working directory", cwd, "chdir"); |
966 } | 1105 } |
967 | 1106 |
968 fclose(fLFile); | 1107 fclose(fLFile); |
969 if(times) { | 1108 if(times) { |
970 fclose(tLFile); | 1109 fclose(tLFile); |
1110 } | |
1111 if(power) { | |
1112 fclose(pLFile); | |
971 } | 1113 } |
972 fclose(kLFile); | 1114 fclose(kLFile); |
973 delete[] fName; | 1115 delete[] fName; |
974 | 1116 |
975 status(dbName); | 1117 status(dbName); |
983 } | 1125 } |
984 // Update database flags | 1126 // Update database flags |
985 dbH->flags = dbH->flags|O2_FLAG_L2NORM; | 1127 dbH->flags = dbH->flags|O2_FLAG_L2NORM; |
986 memcpy (db, dbH, O2_HEADERSIZE); | 1128 memcpy (db, dbH, O2_HEADERSIZE); |
987 } | 1129 } |
988 | 1130 |
1131 void audioDB::power_flag(const char *dbName) { | |
1132 initTables(dbName, true, 0); | |
1133 if (dbH->length > 0) { | |
1134 error("cannot turn on power storage for non-empty database", dbName); | |
1135 } | |
1136 dbH->flags |= O2_FLAG_POWER; | |
1137 memcpy(db, dbH, O2_HEADERSIZE); | |
1138 } | |
1139 | |
1140 bool audioDB::powers_acceptable(double p1, double p2) { | |
1141 if (use_absolute_threshold) { | |
1142 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) { | |
1143 return false; | |
1144 } | |
1145 } | |
1146 if (use_relative_threshold) { | |
1147 if (fabs(p1-p2) > fabs(relative_threshold)) { | |
1148 return false; | |
1149 } | |
1150 } | |
1151 return true; | |
1152 } | |
989 | 1153 |
990 | 1154 |
991 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ | 1155 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ |
992 switch(queryType){ | 1156 switch(queryType){ |
993 case O2_POINT_QUERY: | 1157 case O2_POINT_QUERY: |
1448 if(meanDBdur) | 1612 if(meanDBdur) |
1449 delete meanDBdur; | 1613 delete meanDBdur; |
1450 | 1614 |
1451 } | 1615 } |
1452 | 1616 |
1617 // This is a common pattern in sequence queries: what we are doing is | |
1618 // taking a window of length seqlen over a buffer of length length, | |
1619 // and placing the sum of the elements in that window in the first | |
1620 // element of the window: thus replacing all but the last seqlen | |
1621 // elements in the buffer the corresponding windowed sum. | |
1622 void audioDB::sequence_sum(double *buffer, int length, int seqlen) { | |
1623 double tmp1, tmp2, *ps; | |
1624 int j, w; | |
1625 | |
1626 tmp1 = *buffer; | |
1627 j = 1; | |
1628 w = seqlen - 1; | |
1629 while(w--) { | |
1630 *buffer += buffer[j++]; | |
1631 } | |
1632 ps = buffer + 1; | |
1633 w = length - seqlen; // +1 - 1 | |
1634 while(w--) { | |
1635 tmp2 = *ps; | |
1636 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1); | |
1637 tmp1 = tmp2; | |
1638 ps++; | |
1639 } | |
1640 } | |
1641 | |
1642 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) { | |
1643 int w = length - seqlen + 1; | |
1644 while(w--) { | |
1645 *buffer = sqrt(*buffer); | |
1646 buffer++; | |
1647 } | |
1648 } | |
1649 | |
1650 void audioDB::sequence_average(double *buffer, int length, int seqlen) { | |
1651 int w = length - seqlen + 1; | |
1652 while(w--) { | |
1653 *buffer /= seqlen; | |
1654 buffer++; | |
1655 } | |
1656 } | |
1453 | 1657 |
1454 // k nearest-neighbor (k-NN) search between query and target tracks | 1658 // k nearest-neighbor (k-NN) search between query and target tracks |
1455 // efficient implementation based on matched filter | 1659 // efficient implementation based on matched filter |
1456 // assumes normed shingles | 1660 // assumes normed shingles |
1457 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track | 1661 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track |
1463 // we use stdout in this stub version | 1667 // we use stdout in this stub version |
1464 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 1668 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); |
1465 double* query = (double*)(indata+sizeof(int)); | 1669 double* query = (double*)(indata+sizeof(int)); |
1466 double* queryCopy = 0; | 1670 double* queryCopy = 0; |
1467 | 1671 |
1468 double qMeanL2; | |
1469 double* sMeanL2; | |
1470 | |
1471 unsigned USE_THRESH=0; | |
1472 double SILENCE_THRESH=0; | |
1473 double DIFF_THRESH=0; | |
1474 | |
1475 if(!(dbH->flags & O2_FLAG_L2NORM) ) | 1672 if(!(dbH->flags & O2_FLAG_L2NORM) ) |
1476 error("Database must be L2 normed for sequence query","use -L2NORM"); | 1673 error("Database must be L2 normed for sequence query","use -L2NORM"); |
1477 | 1674 |
1478 if(numVectors<sequenceLength) | 1675 if(numVectors<sequenceLength) |
1479 error("Query shorter than requested sequence length", "maybe use -l"); | 1676 error("Query shorter than requested sequence length", "maybe use -l"); |
1486 // Make a copy of the query | 1683 // Make a copy of the query |
1487 queryCopy = new double[numVectors*dbH->dim]; | 1684 queryCopy = new double[numVectors*dbH->dim]; |
1488 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | 1685 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); |
1489 qNorm = new double[numVectors]; | 1686 qNorm = new double[numVectors]; |
1490 sNorm = new double[dbVectors]; | 1687 sNorm = new double[dbVectors]; |
1491 sMeanL2=new double[dbH->numFiles]; | 1688 assert(qNorm&&sNorm&&queryCopy&&sequenceLength); |
1492 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); | |
1493 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | 1689 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); |
1494 query = queryCopy; | 1690 query = queryCopy; |
1495 | 1691 |
1496 // Make norm measurements relative to sequenceLength | 1692 // Make norm measurements relative to sequenceLength |
1497 unsigned w = sequenceLength-1; | 1693 unsigned w = sequenceLength-1; |
1498 unsigned i,j; | 1694 unsigned i,j; |
1499 double* ps; | |
1500 double tmp1,tmp2; | |
1501 | 1695 |
1502 // Copy the L2 norm values to core to avoid disk random access later on | 1696 // Copy the L2 norm values to core to avoid disk random access later on |
1503 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | 1697 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); |
1504 double* qnPtr = qNorm; | 1698 double* qnPtr = qNorm; |
1505 double* snPtr = sNorm; | 1699 double* snPtr = sNorm; |
1700 | |
1701 double *sPower = 0, *qPower = 0; | |
1702 double *spPtr = 0, *qpPtr = 0; | |
1703 | |
1704 if (usingPower) { | |
1705 if (!(dbH->flags & O2_FLAG_POWER)) { | |
1706 error("database not power-enabled", dbName); | |
1707 } | |
1708 sPower = new double[dbVectors]; | |
1709 spPtr = sPower; | |
1710 memcpy(sPower, powerTable, dbVectors * sizeof(double)); | |
1711 } | |
1712 | |
1506 for(i=0; i<dbH->numFiles; i++){ | 1713 for(i=0; i<dbH->numFiles; i++){ |
1507 if(trackTable[i]>=sequenceLength){ | 1714 if(trackTable[i]>=sequenceLength) { |
1508 tmp1=*snPtr; | 1715 sequence_sum(snPtr, trackTable[i], sequenceLength); |
1509 j=1; | 1716 sequence_sqrt(snPtr, trackTable[i], sequenceLength); |
1510 w=sequenceLength-1; | 1717 |
1511 while(w--) | 1718 if (usingPower) { |
1512 *snPtr+=snPtr[j++]; | 1719 sequence_sum(spPtr, trackTable[i], sequenceLength); |
1513 ps = snPtr+1; | 1720 sequence_average(spPtr, trackTable[i], sequenceLength); |
1514 w=trackTable[i]-sequenceLength; // +1 - 1 | 1721 } |
1515 while(w--){ | 1722 } |
1516 tmp2=*ps; | 1723 snPtr += trackTable[i]; |
1517 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); | 1724 if (usingPower) { |
1518 tmp1=tmp2; | 1725 spPtr += trackTable[i]; |
1519 ps++; | 1726 } |
1520 } | 1727 } |
1521 ps = snPtr; | 1728 |
1522 w=trackTable[i]-sequenceLength+1; | 1729 sequence_sum(qnPtr, numVectors, sequenceLength); |
1523 while(w--){ | 1730 sequence_sqrt(qnPtr, numVectors, sequenceLength); |
1524 *ps=sqrt(*ps); | 1731 |
1525 ps++; | 1732 if (usingPower) { |
1526 } | 1733 qPower = new double[numVectors]; |
1527 } | 1734 qpPtr = qPower; |
1528 snPtr+=trackTable[i]; | 1735 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { |
1529 } | 1736 error("error seeking to data", powerFileName, "lseek"); |
1530 | 1737 } |
1531 double* pn = sMeanL2; | 1738 int count = read(powerfd, qPower, numVectors * sizeof(double)); |
1532 w=dbH->numFiles; | 1739 if (count == -1) { |
1533 while(w--) | 1740 error("error reading data", powerFileName, "read"); |
1534 *pn++=0.0; | 1741 } |
1535 ps=sNorm; | 1742 if ((unsigned) count != numVectors * sizeof(double)) { |
1536 unsigned processedTracks=0; | 1743 error("short read", powerFileName); |
1537 for(i=0; i<dbH->numFiles; i++){ | 1744 } |
1538 if(trackTable[i]>sequenceLength-1){ | 1745 |
1539 w = trackTable[i]-sequenceLength+1; | 1746 sequence_sum(qpPtr, numVectors, sequenceLength); |
1540 pn = sMeanL2+i; | 1747 sequence_average(qpPtr, numVectors, sequenceLength); |
1541 *pn=0; | 1748 } |
1542 while(w--) | |
1543 if(*ps>0) | |
1544 *pn+=*ps++; | |
1545 *pn/=trackTable[i]-sequenceLength+1; | |
1546 SILENCE_THRESH+=*pn; | |
1547 processedTracks++; | |
1548 } | |
1549 ps = sNorm + trackTable[i]; | |
1550 } | |
1551 if(verbosity>1) { | |
1552 cerr << "processedTracks: " << processedTracks << endl; | |
1553 } | |
1554 | |
1555 SILENCE_THRESH/=processedTracks; | |
1556 USE_THRESH=1; // Turn thresholding on | |
1557 DIFF_THRESH=SILENCE_THRESH; // mean shingle power | |
1558 SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE | |
1559 if(verbosity>4) { | |
1560 cerr << "silence thresh: " << SILENCE_THRESH; | |
1561 } | |
1562 w=sequenceLength-1; | |
1563 i=1; | |
1564 tmp1=*qnPtr; | |
1565 while(w--) | |
1566 *qnPtr+=qnPtr[i++]; | |
1567 ps = qnPtr+1; | |
1568 w=numVectors-sequenceLength; // +1 -1 | |
1569 while(w--){ | |
1570 tmp2=*ps; | |
1571 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); | |
1572 tmp1=tmp2; | |
1573 ps++; | |
1574 } | |
1575 ps = qnPtr; | |
1576 qMeanL2 = 0; | |
1577 w=numVectors-sequenceLength+1; | |
1578 while(w--){ | |
1579 *ps=sqrt(*ps); | |
1580 qMeanL2+=*ps++; | |
1581 } | |
1582 qMeanL2 /= numVectors-sequenceLength+1; | |
1583 | 1749 |
1584 if(verbosity>1) { | 1750 if(verbosity>1) { |
1585 cerr << "done." << endl; | 1751 cerr << "done." << endl; |
1586 } | 1752 } |
1587 | 1753 |
1660 error("queryPoint > numVectors-wL+1 in query"); | 1826 error("queryPoint > numVectors-wL+1 in query"); |
1661 else{ | 1827 else{ |
1662 if(verbosity>1) { | 1828 if(verbosity>1) { |
1663 cerr << "query point: " << queryPoint << endl; cerr.flush(); | 1829 cerr << "query point: " << queryPoint << endl; cerr.flush(); |
1664 } | 1830 } |
1665 query=query+queryPoint*dbH->dim; | 1831 query = query + queryPoint * dbH->dim; |
1666 qnPtr=qnPtr+queryPoint; | 1832 qnPtr = qnPtr + queryPoint; |
1833 if (usingPower) { | |
1834 qpPtr = qpPtr + queryPoint; | |
1835 } | |
1667 numVectors=wL; | 1836 numVectors=wL; |
1668 } | 1837 } |
1669 | 1838 |
1670 double ** D = 0; // Differences query and target | 1839 double ** D = 0; // Differences query and target |
1671 double ** DD = 0; // Matched filter distance | 1840 double ** DD = 0; // Matched filter distance |
1674 assert(D); | 1843 assert(D); |
1675 DD = new double*[numVectors]; | 1844 DD = new double*[numVectors]; |
1676 assert(DD); | 1845 assert(DD); |
1677 | 1846 |
1678 gettimeofday(&tv1, NULL); | 1847 gettimeofday(&tv1, NULL); |
1679 processedTracks=0; | 1848 unsigned processedTracks = 0; |
1680 unsigned successfulTracks=0; | 1849 unsigned successfulTracks=0; |
1681 | 1850 |
1682 double* qp; | 1851 double* qp; |
1683 double* sp; | 1852 double* sp; |
1684 double* dp; | 1853 double* dp; |
1792 | 1961 |
1793 // Search for minimum distance by shingles (concatenated vectors) | 1962 // Search for minimum distance by shingles (concatenated vectors) |
1794 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) | 1963 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) |
1795 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ | 1964 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ |
1796 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; | 1965 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; |
1797 if(verbosity>10) { | 1966 if(verbosity>9) { |
1798 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; | 1967 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; |
1799 } | 1968 } |
1800 // Gather chi^2 statistics | 1969 // Gather chi^2 statistics |
1801 if(thisDist<minSample) | 1970 if(thisDist<minSample) |
1802 minSample=thisDist; | 1971 minSample=thisDist; |
1808 logSampleSum+=log(thisDist); | 1977 logSampleSum+=log(thisDist); |
1809 } | 1978 } |
1810 | 1979 |
1811 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); | 1980 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); |
1812 // Power test | 1981 // Power test |
1813 if(!USE_THRESH || | 1982 if (usingPower) { |
1814 // Threshold on mean L2 of Q and S sequences | 1983 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) { |
1815 (USE_THRESH && qnPtr[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && | 1984 thisDist = 1000000.0; |
1816 // Are both query and target windows above mean energy? | 1985 } |
1817 (qnPtr[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH ))) | 1986 } |
1818 thisDist=thisDist; // Computed above | |
1819 else | |
1820 thisDist=1000000.0; | |
1821 | 1987 |
1822 // k-NN match algorithm | 1988 // k-NN match algorithm |
1823 m=pointNN; | 1989 m=pointNN; |
1824 while(m--){ | 1990 while(m--){ |
1825 if(thisDist<=distances[m]) | 1991 if(thisDist<=distances[m]) |
1932 adbQueryResponse->result.Spos[k]=trackSIndexes[k]; | 2098 adbQueryResponse->result.Spos[k]=trackSIndexes[k]; |
1933 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); | 2099 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); |
1934 } | 2100 } |
1935 } | 2101 } |
1936 | 2102 |
1937 | |
1938 // Clean up | 2103 // Clean up |
1939 if(trackOffsetTable) | 2104 if(trackOffsetTable) |
1940 delete[] trackOffsetTable; | 2105 delete[] trackOffsetTable; |
1941 if(queryCopy) | 2106 if(queryCopy) |
1942 delete[] queryCopy; | 2107 delete[] queryCopy; |
1943 if(qNorm) | 2108 if(qNorm) |
1944 delete[] qNorm; | 2109 delete[] qNorm; |
1945 if(sNorm) | 2110 if(sNorm) |
1946 delete[] sNorm; | 2111 delete[] sNorm; |
1947 if(sMeanL2) | 2112 if(qPower) |
1948 delete[] sMeanL2; | 2113 delete[] qPower; |
2114 if(sPower) | |
2115 delete[] sPower; | |
1949 if(D) | 2116 if(D) |
1950 delete[] D; | 2117 delete[] D; |
1951 if(DD) | 2118 if(DD) |
1952 delete[] DD; | 2119 delete[] DD; |
1953 if(timesdata) | 2120 if(timesdata) |
1954 delete[] timesdata; | 2121 delete[] timesdata; |
1955 if(meanDBdur) | 2122 if(meanDBdur) |
1956 delete[] meanDBdur; | 2123 delete[] meanDBdur; |
1957 | |
1958 | |
1959 } | 2124 } |
1960 | 2125 |
1961 // Radius search between query and target tracks | 2126 // Radius search between query and target tracks |
1962 // efficient implementation based on matched filter | 2127 // efficient implementation based on matched filter |
1963 // assumes normed shingles | 2128 // assumes normed shingles |
1970 // we use stdout in this stub version | 2135 // we use stdout in this stub version |
1971 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 2136 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); |
1972 double* query = (double*)(indata+sizeof(int)); | 2137 double* query = (double*)(indata+sizeof(int)); |
1973 double* queryCopy = 0; | 2138 double* queryCopy = 0; |
1974 | 2139 |
1975 double qMeanL2; | |
1976 double* sMeanL2; | |
1977 | |
1978 unsigned USE_THRESH=0; | |
1979 double SILENCE_THRESH=0; | |
1980 double DIFF_THRESH=0; | |
1981 | |
1982 if(!(dbH->flags & O2_FLAG_L2NORM) ) | 2140 if(!(dbH->flags & O2_FLAG_L2NORM) ) |
1983 error("Database must be L2 normed for sequence query","use -l2norm"); | 2141 error("Database must be L2 normed for sequence query","use -l2norm"); |
1984 | 2142 |
1985 if(verbosity>1) { | 2143 if(verbosity>1) { |
1986 cerr << "performing norms ... "; cerr.flush(); | 2144 cerr << "performing norms ... "; cerr.flush(); |
1990 // Make a copy of the query | 2148 // Make a copy of the query |
1991 queryCopy = new double[numVectors*dbH->dim]; | 2149 queryCopy = new double[numVectors*dbH->dim]; |
1992 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | 2150 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); |
1993 qNorm = new double[numVectors]; | 2151 qNorm = new double[numVectors]; |
1994 sNorm = new double[dbVectors]; | 2152 sNorm = new double[dbVectors]; |
1995 sMeanL2=new double[dbH->numFiles]; | 2153 assert(qNorm&&sNorm&&queryCopy&&sequenceLength); |
1996 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); | |
1997 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | 2154 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); |
1998 query = queryCopy; | 2155 query = queryCopy; |
1999 | 2156 |
2000 // Make norm measurements relative to sequenceLength | 2157 // Make norm measurements relative to sequenceLength |
2001 unsigned w = sequenceLength-1; | 2158 unsigned w = sequenceLength-1; |
2002 unsigned i,j; | 2159 unsigned i,j; |
2003 double* ps; | |
2004 double tmp1,tmp2; | |
2005 | 2160 |
2006 // Copy the L2 norm values to core to avoid disk random access later on | 2161 // Copy the L2 norm values to core to avoid disk random access later on |
2007 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | 2162 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); |
2008 double* snPtr = sNorm; | 2163 double* snPtr = sNorm; |
2009 double* qnPtr = qNorm; | 2164 double* qnPtr = qNorm; |
2165 | |
2166 double *sPower = 0, *qPower = 0; | |
2167 double *spPtr = 0, *qpPtr = 0; | |
2168 | |
2169 if (usingPower) { | |
2170 if(!(dbH->flags & O2_FLAG_POWER)) { | |
2171 error("database not power-enabled", dbName); | |
2172 } | |
2173 sPower = new double[dbVectors]; | |
2174 spPtr = sPower; | |
2175 memcpy(sPower, powerTable, dbVectors * sizeof(double)); | |
2176 } | |
2177 | |
2010 for(i=0; i<dbH->numFiles; i++){ | 2178 for(i=0; i<dbH->numFiles; i++){ |
2011 if(trackTable[i]>=sequenceLength){ | 2179 if(trackTable[i]>=sequenceLength) { |
2012 tmp1=*snPtr; | 2180 sequence_sum(snPtr, trackTable[i], sequenceLength); |
2013 j=1; | 2181 sequence_sqrt(snPtr, trackTable[i], sequenceLength); |
2014 w=sequenceLength-1; | 2182 if (usingPower) { |
2015 while(w--) | 2183 sequence_sum(spPtr, trackTable[i], sequenceLength); |
2016 *snPtr+=snPtr[j++]; | 2184 sequence_average(spPtr, trackTable[i], sequenceLength); |
2017 ps = snPtr+1; | 2185 } |
2018 w=trackTable[i]-sequenceLength; // +1 - 1 | 2186 } |
2019 while(w--){ | 2187 snPtr += trackTable[i]; |
2020 tmp2=*ps; | 2188 if (usingPower) { |
2021 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); | 2189 spPtr += trackTable[i]; |
2022 tmp1=tmp2; | 2190 } |
2023 ps++; | 2191 } |
2024 } | 2192 |
2025 ps = snPtr; | 2193 sequence_sum(qnPtr, numVectors, sequenceLength); |
2026 w=trackTable[i]-sequenceLength+1; | 2194 sequence_sqrt(qnPtr, numVectors, sequenceLength); |
2027 while(w--){ | 2195 |
2028 *ps=sqrt(*ps); | 2196 if (usingPower) { |
2029 ps++; | 2197 qPower = new double[numVectors]; |
2030 } | 2198 qpPtr = qPower; |
2031 } | 2199 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { |
2032 snPtr+=trackTable[i]; | 2200 error("error seeking to data", powerFileName, "lseek"); |
2033 } | 2201 } |
2034 | 2202 int count = read(powerfd, qPower, numVectors * sizeof(double)); |
2035 double* pn = sMeanL2; | 2203 if (count == -1) { |
2036 w=dbH->numFiles; | 2204 error("error reading data", powerFileName, "read"); |
2037 while(w--) | 2205 } |
2038 *pn++=0.0; | 2206 if ((unsigned) count != numVectors * sizeof(double)) { |
2039 ps=sNorm; | 2207 error("short read", powerFileName); |
2040 unsigned processedTracks=0; | 2208 } |
2041 for(i=0; i<dbH->numFiles; i++){ | 2209 |
2042 if(trackTable[i]>sequenceLength-1){ | 2210 sequence_sum(qpPtr, numVectors, sequenceLength); |
2043 w = trackTable[i]-sequenceLength+1; | 2211 sequence_average(qpPtr, numVectors, sequenceLength); |
2044 pn = sMeanL2+i; | 2212 } |
2045 *pn=0; | |
2046 while(w--) | |
2047 if(*ps>0) | |
2048 *pn+=*ps++; | |
2049 *pn/=trackTable[i]-sequenceLength+1; | |
2050 SILENCE_THRESH+=*pn; | |
2051 processedTracks++; | |
2052 } | |
2053 ps = sNorm + trackTable[i]; | |
2054 } | |
2055 if(verbosity>1) { | |
2056 cerr << "processedTracks: " << processedTracks << endl; | |
2057 } | |
2058 | |
2059 SILENCE_THRESH/=processedTracks; | |
2060 USE_THRESH=1; // Turn thresholding on | |
2061 DIFF_THRESH=SILENCE_THRESH; // mean shingle power | |
2062 SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE | |
2063 if(verbosity>4) { | |
2064 cerr << "silence thresh: " << SILENCE_THRESH; | |
2065 } | |
2066 w=sequenceLength-1; | |
2067 i=1; | |
2068 tmp1=*qnPtr; | |
2069 while(w--) | |
2070 *qnPtr+=qnPtr[i++]; | |
2071 ps = qnPtr+1; | |
2072 w=numVectors-sequenceLength; // +1 -1 | |
2073 while(w--){ | |
2074 tmp2=*ps; | |
2075 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); | |
2076 tmp1=tmp2; | |
2077 ps++; | |
2078 } | |
2079 ps = qnPtr; | |
2080 qMeanL2 = 0; | |
2081 w=numVectors-sequenceLength+1; | |
2082 while(w--){ | |
2083 *ps=sqrt(*ps); | |
2084 qMeanL2+=*ps++; | |
2085 } | |
2086 qMeanL2 /= numVectors-sequenceLength+1; | |
2087 | 2213 |
2088 if(verbosity>1) { | 2214 if(verbosity>1) { |
2089 cerr << "done." << endl; | 2215 cerr << "done." << endl; |
2090 } | 2216 } |
2091 | 2217 |
2164 error("queryPoint > numVectors-wL+1 in query"); | 2290 error("queryPoint > numVectors-wL+1 in query"); |
2165 else{ | 2291 else{ |
2166 if(verbosity>1) { | 2292 if(verbosity>1) { |
2167 cerr << "query point: " << queryPoint << endl; cerr.flush(); | 2293 cerr << "query point: " << queryPoint << endl; cerr.flush(); |
2168 } | 2294 } |
2169 query=query+queryPoint*dbH->dim; | 2295 query = query + queryPoint*dbH->dim; |
2170 qnPtr=qnPtr+queryPoint; | 2296 qnPtr = qnPtr + queryPoint; |
2297 if (usingPower) { | |
2298 qpPtr = qpPtr + queryPoint; | |
2299 } | |
2171 numVectors=wL; | 2300 numVectors=wL; |
2172 } | 2301 } |
2173 | 2302 |
2174 double ** D = 0; // Differences query and target | 2303 double ** D = 0; // Differences query and target |
2175 double ** DD = 0; // Matched filter distance | 2304 double ** DD = 0; // Matched filter distance |
2178 assert(D); | 2307 assert(D); |
2179 DD = new double*[numVectors]; | 2308 DD = new double*[numVectors]; |
2180 assert(DD); | 2309 assert(DD); |
2181 | 2310 |
2182 gettimeofday(&tv1, NULL); | 2311 gettimeofday(&tv1, NULL); |
2183 processedTracks=0; | 2312 unsigned processedTracks = 0; |
2184 unsigned successfulTracks=0; | 2313 unsigned successfulTracks=0; |
2185 | 2314 |
2186 double* qp; | 2315 double* qp; |
2187 double* sp; | 2316 double* sp; |
2188 double* dp; | 2317 double* dp; |
2296 | 2425 |
2297 // Search for minimum distance by shingles (concatenated vectors) | 2426 // Search for minimum distance by shingles (concatenated vectors) |
2298 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) | 2427 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) |
2299 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ | 2428 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ |
2300 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; | 2429 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; |
2301 if(verbosity>10) { | 2430 if(verbosity>9) { |
2302 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; | 2431 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; |
2303 } | 2432 } |
2304 // Gather chi^2 statistics | 2433 // Gather chi^2 statistics |
2305 if(thisDist<minSample) | 2434 if(thisDist<minSample) |
2306 minSample=thisDist; | 2435 minSample=thisDist; |
2312 logSampleSum+=log(thisDist); | 2441 logSampleSum+=log(thisDist); |
2313 } | 2442 } |
2314 | 2443 |
2315 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); | 2444 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); |
2316 // Power test | 2445 // Power test |
2317 if(!USE_THRESH || | 2446 if (usingPower) { |
2318 // Threshold on mean L2 of Q and S sequences | 2447 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) { |
2319 (USE_THRESH && qnPtr[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && | 2448 thisDist = 1000000.0; |
2320 // Are both query and target windows above mean energy? | 2449 } |
2321 (qnPtr[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH ))) | 2450 } |
2322 thisDist=thisDist; // Computed above | 2451 |
2323 else | |
2324 thisDist=1000000.0; | |
2325 if(thisDist>=0 && thisDist<=radius){ | 2452 if(thisDist>=0 && thisDist<=radius){ |
2326 distances[0]++; // increment count | 2453 distances[0]++; // increment count |
2327 break; // only need one track point per query point | 2454 break; // only need one track point per query point |
2328 } | 2455 } |
2329 } | 2456 } |
2422 delete[] queryCopy; | 2549 delete[] queryCopy; |
2423 if(qNorm) | 2550 if(qNorm) |
2424 delete[] qNorm; | 2551 delete[] qNorm; |
2425 if(sNorm) | 2552 if(sNorm) |
2426 delete[] sNorm; | 2553 delete[] sNorm; |
2427 if(sMeanL2) | 2554 if(qPower) |
2428 delete[] sMeanL2; | 2555 delete[] qPower; |
2556 if(sPower) | |
2557 delete[] sPower; | |
2429 if(D) | 2558 if(D) |
2430 delete[] D; | 2559 delete[] D; |
2431 if(DD) | 2560 if(DD) |
2432 delete[] DD; | 2561 delete[] DD; |
2433 if(timesdata) | 2562 if(timesdata) |
2434 delete[] timesdata; | 2563 delete[] timesdata; |
2435 if(meanDBdur) | 2564 if(meanDBdur) |
2436 delete[] meanDBdur; | 2565 delete[] meanDBdur; |
2437 | |
2438 | |
2439 } | 2566 } |
2440 | 2567 |
2441 // Unit norm block of features | 2568 // Unit norm block of features |
2442 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ | 2569 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ |
2443 unsigned d; | 2570 unsigned d; |
2643 soap_receiver_fault(soap, err, ""); | 2770 soap_receiver_fault(soap, err, ""); |
2644 return SOAP_FAULT; | 2771 return SOAP_FAULT; |
2645 } | 2772 } |
2646 } | 2773 } |
2647 | 2774 |
2775 int adb__sequenceQuery(struct soap* soap, xsd__string dbName, xsd__string qKey, | |
2776 adb__sequenceQueryParms *parms, | |
2777 adb__queryResponse &adbQueryResponse) { | |
2778 | |
2779 char qPosStr[256]; | |
2780 char pointNNStr[256]; | |
2781 char trackNNStr[256]; | |
2782 char seqLenStr[256]; | |
2783 char relative_thresholdStr[256]; | |
2784 char absolute_thresholdStr[256]; | |
2785 | |
2786 /* When the branch is merged, move this to a header and use it | |
2787 elsewhere */ | |
2788 #define INTSTRINGIFY(val, str) \ | |
2789 snprintf(str, 256, "%d", val); | |
2790 #define DOUBLESTRINGIFY(val, str) \ | |
2791 snprintf(str, 256, "%f", val); | |
2792 | |
2793 INTSTRINGIFY(parms->qPos, qPosStr); | |
2794 INTSTRINGIFY(parms->pointNN, pointNNStr); | |
2795 INTSTRINGIFY(parms->segNN, trackNNStr); | |
2796 /* FIXME: decide which of segLen and seqLen should live */ | |
2797 INTSTRINGIFY(parms->segLen, seqLenStr); | |
2798 | |
2799 DOUBLESTRINGIFY(parms->relative_threshold, relative_thresholdStr); | |
2800 DOUBLESTRINGIFY(parms->absolute_threshold, absolute_thresholdStr); | |
2801 | |
2802 const char *argv[] = { | |
2803 "./audioDB", | |
2804 COM_QUERY, | |
2805 "sequence", | |
2806 COM_DATABASE, | |
2807 dbName, | |
2808 COM_FEATURES, | |
2809 qKey, | |
2810 COM_KEYLIST, | |
2811 /* FIXME: when this branch is merged, use ENSURE_STRING */ | |
2812 parms->keyList==0?"":parms->keyList, | |
2813 COM_TIMES, | |
2814 parms->timesFileName==0?"":parms->timesFileName, | |
2815 COM_QUERYPOWER, | |
2816 parms->powerFileName==0?"":parms->powerFileName, | |
2817 COM_QPOINT, | |
2818 qPosStr, | |
2819 COM_POINTNN, | |
2820 pointNNStr, | |
2821 COM_TRACKNN, | |
2822 trackNNStr, | |
2823 COM_SEQLEN, | |
2824 seqLenStr, | |
2825 COM_RELATIVE_THRESH, | |
2826 relative_thresholdStr, | |
2827 COM_ABSOLUTE_THRESH, | |
2828 absolute_thresholdStr | |
2829 }; | |
2830 | |
2831 const unsigned argc = 25; | |
2832 | |
2833 try { | |
2834 audioDB(argc, (char* const*)argv, &adbQueryResponse); | |
2835 return SOAP_OK; | |
2836 } catch (char *err) { | |
2837 soap_receiver_fault(soap, err, ""); | |
2838 return SOAP_FAULT; | |
2839 } | |
2840 } | |
2841 | |
2648 int main(const unsigned argc, char* const argv[]){ | 2842 int main(const unsigned argc, char* const argv[]){ |
2649 audioDB(argc, argv); | 2843 audioDB(argc, argv); |
2650 } | 2844 } |
2651 |