comparison audioDB.cpp @ 193:f9d16137e704

Merge powertable branch -r168:227 to trunk.
author mas01cr
date Wed, 21 Nov 2007 11:35:44 +0000
parents cdd441dcc9a8
children 0e75deb7d4d1
comparison
equal deleted inserted replaced
170:10bcea4e5c40 193:f9d16137e704
70 status(dbName); 70 status(dbName);
71 71
72 else if(O2_ACTION(COM_L2NORM)) 72 else if(O2_ACTION(COM_L2NORM))
73 l2norm(dbName); 73 l2norm(dbName);
74 74
75 else if(O2_ACTION(COM_POWER))
76 power_flag(dbName);
77
75 else if(O2_ACTION(COM_DUMP)) 78 else if(O2_ACTION(COM_DUMP))
76 dump(dbName); 79 dump(dbName);
77 80
78 else 81 else
79 error("Unrecognized command",command); 82 error("Unrecognized command",command);
218 command=COM_L2NORM; 221 command=COM_L2NORM;
219 dbName=args_info.database_arg; 222 dbName=args_info.database_arg;
220 return 0; 223 return 0;
221 } 224 }
222 225
226 if(args_info.POWER_given){
227 command=COM_POWER;
228 dbName=args_info.database_arg;
229 return 0;
230 }
231
223 if(args_info.INSERT_given){ 232 if(args_info.INSERT_given){
224 command=COM_INSERT; 233 command=COM_INSERT;
225 dbName=args_info.database_arg; 234 dbName=args_info.database_arg;
226 inFile=args_info.features_arg; 235 inFile=args_info.features_arg;
227 if(args_info.key_given) 236 if(args_info.key_given)
232 if(!(timesFile = new ifstream(timesFileName,ios::in))) 241 if(!(timesFile = new ifstream(timesFileName,ios::in)))
233 error("Could not open times file for reading", timesFileName); 242 error("Could not open times file for reading", timesFileName);
234 usingTimes=1; 243 usingTimes=1;
235 } 244 }
236 } 245 }
246 if (args_info.power_given) {
247 powerFileName = args_info.power_arg;
248 if (strlen(powerFileName) > 0) {
249 if (!(powerfd = open(powerFileName, O_RDONLY))) {
250 error("Could not open power file for reading", powerFileName, "open");
251 }
252 usingPower = 1;
253 }
254 }
237 return 0; 255 return 0;
238 } 256 }
239 257
240 if(args_info.BATCHINSERT_given){ 258 if(args_info.BATCHINSERT_given){
241 command=COM_BATCHINSERT; 259 command=COM_BATCHINSERT;
257 timesFileName=args_info.timesList_arg; 275 timesFileName=args_info.timesList_arg;
258 if(strlen(timesFileName)>0){ 276 if(strlen(timesFileName)>0){
259 if(!(timesFile = new ifstream(timesFileName,ios::in))) 277 if(!(timesFile = new ifstream(timesFileName,ios::in)))
260 error("Could not open timesList file for reading", timesFileName); 278 error("Could not open timesList file for reading", timesFileName);
261 usingTimes=1; 279 usingTimes=1;
280 }
281 }
282 if(args_info.powerList_given){
283 powerFileName=args_info.powerList_arg;
284 if(strlen(powerFileName)>0){
285 if(!(powerFile = new ifstream(powerFileName,ios::in)))
286 error("Could not open powerList file for reading", powerFileName);
287 usingPower=1;
262 } 288 }
263 } 289 }
264 return 0; 290 return 0;
265 } 291 }
266 292
280 timesFileName=args_info.times_arg; 306 timesFileName=args_info.times_arg;
281 if(strlen(timesFileName)>0){ 307 if(strlen(timesFileName)>0){
282 if(!(timesFile = new ifstream(timesFileName,ios::in))) 308 if(!(timesFile = new ifstream(timesFileName,ios::in)))
283 error("Could not open times file for reading", timesFileName); 309 error("Could not open times file for reading", timesFileName);
284 usingTimes=1; 310 usingTimes=1;
311 }
312 }
313
314 if(args_info.power_given){
315 powerFileName=args_info.power_arg;
316 if(strlen(powerFileName)>0){
317 if (!(powerfd = open(powerFileName, O_RDONLY))) {
318 error("Could not open power file for reading", powerFileName, "open");
319 }
320 usingPower = 1;
285 } 321 }
286 } 322 }
287 323
288 // query type 324 // query type
289 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) 325 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0)
316 } 352 }
317 sequenceHop = args_info.sequencehop_arg; 353 sequenceHop = args_info.sequencehop_arg;
318 if(sequenceHop < 1 || sequenceHop > 1000) { 354 if(sequenceHop < 1 || sequenceHop > 1000) {
319 error("seqhop out of range: 1 <= seqhop <= 1000"); 355 error("seqhop out of range: 1 <= seqhop <= 1000");
320 } 356 }
357
358 if (args_info.absolute_threshold_given) {
359 if (args_info.absolute_threshold_arg >= 0) {
360 error("absolute threshold out of range: should be negative");
361 }
362 use_absolute_threshold = true;
363 absolute_threshold = args_info.absolute_threshold_arg;
364 }
365 if (args_info.relative_threshold_given) {
366 use_relative_threshold = true;
367 relative_threshold = args_info.relative_threshold_arg;
368 }
321 return 0; 369 return 0;
322 } 370 }
323 return -1; // no command found 371 return -1; // no command found
324 } 372 }
325 373
410 dbH->fileTableOffset = ALIGN_UP(O2_HEADERSIZE, 8); 458 dbH->fileTableOffset = ALIGN_UP(O2_HEADERSIZE, 8);
411 dbH->trackTableOffset = ALIGN_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles, 8); 459 dbH->trackTableOffset = ALIGN_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles, 8);
412 dbH->dataOffset = ALIGN_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles, 8); 460 dbH->dataOffset = ALIGN_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles, 8);
413 dbH->l2normTableOffset = ALIGN_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8); 461 dbH->l2normTableOffset = ALIGN_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8);
414 dbH->timesTableOffset = ALIGN_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8); 462 dbH->timesTableOffset = ALIGN_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8);
463 dbH->powerTableOffset = ALIGN_DOWN(dbH->timesTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8);
415 dbH->dbSize = size; 464 dbH->dbSize = size;
416 465
417 memcpy (db, dbH, O2_HEADERSIZE); 466 memcpy (db, dbH, O2_HEADERSIZE);
418 if(verbosity) { 467 if(verbosity) {
419 cerr << COM_CREATE << " " << dbName << endl; 468 cerr << COM_CREATE << " " << dbName << endl;
468 fileTable = (char *) (db + dbH->fileTableOffset); 517 fileTable = (char *) (db + dbH->fileTableOffset);
469 trackTable = (unsigned *) (db + dbH->trackTableOffset); 518 trackTable = (unsigned *) (db + dbH->trackTableOffset);
470 dataBuf = (double *) (db + dbH->dataOffset); 519 dataBuf = (double *) (db + dbH->dataOffset);
471 l2normTable = (double *) (db + dbH->l2normTableOffset); 520 l2normTable = (double *) (db + dbH->l2normTableOffset);
472 timesTable = (double *) (db + dbH->timesTableOffset); 521 timesTable = (double *) (db + dbH->timesTableOffset);
522 powerTable = (double *) (db + dbH->powerTableOffset);
473 } 523 }
474 524
475 void audioDB::initInputFile (const char *inFile) { 525 void audioDB::initInputFile (const char *inFile) {
476 if (inFile) { 526 if (inFile) {
477 if ((infid = open(inFile, O_RDONLY)) < 0) { 527 if ((infid = open(inFile, O_RDONLY)) < 0) {
520 570
521 initTables(dbName, 1, inFile); 571 initTables(dbName, 1, inFile);
522 572
523 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 573 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
524 error("Must use timestamps with timestamped database","use --times"); 574 error("Must use timestamps with timestamped database","use --times");
575
576 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
577 error("Must use power with power-enabled database", dbName);
525 578
526 // Check that there is room for at least 1 more file 579 // Check that there is room for at least 1 more file
527 if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int))) 580 if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int)))
528 error("Insert failed: no more room in database", inFile); 581 error("Insert failed: no more room in database", inFile);
529 582
564 // Check times status and insert times from file 617 // Check times status and insert times from file
565 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); 618 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
566 double* timesdata=timesTable+timesoffset; 619 double* timesdata=timesTable+timesoffset;
567 assert(timesdata+numVectors<l2normTable); 620 assert(timesdata+numVectors<l2normTable);
568 insertTimeStamps(numVectors, timesFile, timesdata); 621 insertTimeStamps(numVectors, timesFile, timesdata);
622
623 double *powerdata = powerTable + timesoffset;
624 insertPowerData(numVectors, powerfd, powerdata);
569 625
570 // Increment file count 626 // Increment file count
571 dbH->numFiles++; 627 dbH->numFiles++;
572 628
573 // Update Header information 629 // Update Header information
650 } 706 }
651 } 707 }
652 } 708 }
653 } 709 }
654 710
711 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
712 if (usingPower) {
713 if (!(dbH->flags & O2_FLAG_POWER)) {
714 error("Cannot insert power data on non-power DB", dbName);
715 }
716
717 int one;
718 unsigned int count;
719
720 count = read(powerfd, &one, sizeof(unsigned int));
721 if (count != sizeof(unsigned int)) {
722 error("powerfd read failed", "int", "read");
723 }
724 if (one != 1) {
725 error("dimensionality of power file not 1", powerFileName);
726 }
727
728 // FIXME: should check that the powerfile is the right size for
729 // this. -- CSR, 2007-10-30
730 count = read(powerfd, powerdata, numVectors * sizeof(double));
731 if (count != numVectors * sizeof(double)) {
732 error("powerfd read failed", "double", "read");
733 }
734 }
735 }
736
655 void audioDB::batchinsert(const char* dbName, const char* inFile) { 737 void audioDB::batchinsert(const char* dbName, const char* inFile) {
656 738
657 initDBHeader(dbName, true); 739 initDBHeader(dbName, true);
658 740
659 if(!key) 741 if(!key)
660 key=inFile; 742 key=inFile;
661 ifstream *filesIn = 0; 743 ifstream *filesIn = 0;
662 ifstream *keysIn = 0; 744 ifstream *keysIn = 0;
663 ifstream* thisTimesFile = 0; 745 ifstream* thisTimesFile = 0;
746 int thispowerfd = 0;
664 747
665 if(!(filesIn = new ifstream(inFile))) 748 if(!(filesIn = new ifstream(inFile)))
666 error("Could not open batch in file", inFile); 749 error("Could not open batch in file", inFile);
667 if(key && key!=inFile) 750 if(key && key!=inFile)
668 if(!(keysIn = new ifstream(key))) 751 if(!(keysIn = new ifstream(key)))
669 error("Could not open batch key file",key); 752 error("Could not open batch key file",key);
670 753
671 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 754 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
672 error("Must use timestamps with timestamped database","use --times"); 755 error("Must use timestamps with timestamped database","use --times");
673 756
757 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
758 error("Must use power with power-enabled database", dbName);
759
674 unsigned totalVectors=0; 760 unsigned totalVectors=0;
675 char *thisKey = new char[MAXSTR]; 761 char *thisKey = new char[MAXSTR];
676 char *thisFile = new char[MAXSTR]; 762 char *thisFile = new char[MAXSTR];
677 char *thisTimesFileName = new char[MAXSTR]; 763 char *thisTimesFileName = new char[MAXSTR];
764 char *thisPowerFileName = new char[MAXSTR];
678 765
679 do{ 766 do{
680 filesIn->getline(thisFile,MAXSTR); 767 filesIn->getline(thisFile,MAXSTR);
681 if(key && key!=inFile) 768 if(key && key!=inFile)
682 keysIn->getline(thisKey,MAXSTR); 769 keysIn->getline(thisKey,MAXSTR);
683 else 770 else
684 thisKey = thisFile; 771 thisKey = thisFile;
685 if(usingTimes) 772 if(usingTimes)
686 timesFile->getline(thisTimesFileName,MAXSTR); 773 timesFile->getline(thisTimesFileName,MAXSTR);
774 if(usingPower)
775 powerFile->getline(thisPowerFileName, MAXSTR);
687 776
688 if(filesIn->eof()) 777 if(filesIn->eof())
689 break; 778 break;
690 779
691 initInputFile(thisFile); 780 initInputFile(thisFile);
715 if(!numVectors){ 804 if(!numVectors){
716 if(verbosity) { 805 if(verbosity) {
717 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; 806 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl;
718 } 807 }
719 } 808 }
720 else{ 809 else{
721 if(usingTimes){ 810 if(usingTimes){
722 if(timesFile->eof()) 811 if(timesFile->eof())
723 error("not enough timestamp files in timesList"); 812 error("not enough timestamp files in timesList");
724 thisTimesFile=new ifstream(thisTimesFileName,ios::in); 813 thisTimesFile=new ifstream(thisTimesFileName,ios::in);
725 if(!thisTimesFile->is_open()) 814 if(!thisTimesFile->is_open())
730 assert(timesdata+numVectors<l2normTable); 819 assert(timesdata+numVectors<l2normTable);
731 insertTimeStamps(numVectors,thisTimesFile,timesdata); 820 insertTimeStamps(numVectors,thisTimesFile,timesdata);
732 if(thisTimesFile) 821 if(thisTimesFile)
733 delete thisTimesFile; 822 delete thisTimesFile;
734 } 823 }
735 824
825 if (usingPower) {
826 if(powerFile->eof()) {
827 error("not enough power files in powerList", powerFileName);
828 }
829 thispowerfd = open(thisPowerFileName, O_RDONLY);
830 if (thispowerfd < 0) {
831 error("failed to open power file", thisPowerFileName);
832 }
833 unsigned insertoffset = dbH->length;
834 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
835 double *powerdata = powerTable + poweroffset;
836 insertPowerData(numVectors, thispowerfd, powerdata);
837 if (0 < thispowerfd) {
838 close(thispowerfd);
839 }
840 }
736 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); 841 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
737 842
738 unsigned insertoffset = dbH->length;// Store current state 843 unsigned insertoffset = dbH->length;// Store current state
739 844
740 // Increment file count 845 // Increment file count
877 982
878 if((chdir(output)) < 0) { 983 if((chdir(output)) < 0) {
879 error("error changing working directory", output, "chdir"); 984 error("error changing working directory", output, "chdir");
880 } 985 }
881 986
882 int fLfd, tLfd = 0, kLfd; 987 int fLfd, tLfd = 0, pLfd = 0, kLfd;
883 FILE *fLFile, *tLFile = 0, *kLFile; 988 FILE *fLFile, *tLFile = 0, *pLFile = 0, *kLFile;
884 989
885 if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { 990 if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
886 error("error creating featureList file", "featureList.txt", "open"); 991 error("error creating featureList file", "featureList.txt", "open");
887 } 992 }
993
888 int times = dbH->flags & O2_FLAG_TIMES; 994 int times = dbH->flags & O2_FLAG_TIMES;
889 if (times) { 995 if (times) {
890 if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { 996 if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
891 error("error creating timesList file", "timesList.txt", "open"); 997 error("error creating timesList file", "timesList.txt", "open");
892 } 998 }
893 } 999 }
1000
1001 int power = dbH->flags & O2_FLAG_POWER;
1002 if (power) {
1003 if ((pLfd = open("powerList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
1004 error("error creating powerList file", "powerList.txt", "open");
1005 }
1006 }
1007
894 if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { 1008 if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
895 error("error creating keyList file", "keyList.txt", "open"); 1009 error("error creating keyList file", "keyList.txt", "open");
896 } 1010 }
897 1011
898 /* can these fail? I sincerely hope not. */ 1012 /* can these fail? I sincerely hope not. */
899 fLFile = fdopen(fLfd, "w"); 1013 fLFile = fdopen(fLfd, "w");
900 if (times) { 1014 if (times) {
901 tLFile = fdopen(tLfd, "w"); 1015 tLFile = fdopen(tLfd, "w");
902 } 1016 }
1017 if (power) {
1018 pLFile = fdopen(pLfd, "w");
1019 }
903 kLFile = fdopen(kLfd, "w"); 1020 kLFile = fdopen(kLfd, "w");
904 1021
905 char *fName = new char[256]; 1022 char *fName = new char[256];
906 int ffd; 1023 int ffd, pfd;
907 FILE *tFile; 1024 FILE *tFile;
908 unsigned pos = 0; 1025 unsigned pos = 0;
909 for(unsigned k = 0; k < dbH->numFiles; k++) { 1026 for(unsigned k = 0; k < dbH->numFiles; k++) {
910 fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE); 1027 fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE);
911 snprintf(fName, 256, "%05d.features", k); 1028 snprintf(fName, 256, "%05d.features", k);
934 // -- CSR, 2007-10-19 1051 // -- CSR, 2007-10-19
935 fprintf(tFile, "%.16e\n", *(timesTable + pos + i)); 1052 fprintf(tFile, "%.16e\n", *(timesTable + pos + i));
936 } 1053 }
937 fprintf(tLFile, "%s\n", fName); 1054 fprintf(tLFile, "%s\n", fName);
938 } 1055 }
1056
1057 if (power) {
1058 uint32_t one = 1;
1059 snprintf(fName, 256, "%05d.power", k);
1060 if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
1061 error("error creating power file", fName, "open");
1062 }
1063 if ((write(pfd, &one, sizeof(uint32_t))) < 0) {
1064 error("error writing one", fName, "write");
1065 }
1066 if ((write(pfd, powerTable + pos, trackTable[k] * sizeof(double))) < 0) {
1067 error("error writing data", fName, "write");
1068 }
1069 fprintf(pLFile, "%s\n", fName);
1070 close(pfd);
1071 }
939 1072
940 pos += trackTable[k]; 1073 pos += trackTable[k];
941 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; 1074 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl;
942 } 1075 }
943 1076
952 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\ 1085 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\
953 \"${AUDIODB}\" -d \"$1\" -N --size=%d\n", dbH->dbSize / 1000000); 1086 \"${AUDIODB}\" -d \"$1\" -N --size=%d\n", dbH->dbSize / 1000000);
954 if(dbH->flags & O2_FLAG_L2NORM) { 1087 if(dbH->flags & O2_FLAG_L2NORM) {
955 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n"); 1088 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n");
956 } 1089 }
1090 if(power) {
1091 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -P\n");
1092 }
957 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt"); 1093 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt");
958 if(times) { 1094 if(times) {
959 fprintf(scriptFile, " -T timesList.txt"); 1095 fprintf(scriptFile, " -T timesList.txt");
960 } 1096 }
1097 if(power) {
1098 fprintf(scriptFile, " -W powerList.txt");
1099 }
961 fprintf(scriptFile, "\n"); 1100 fprintf(scriptFile, "\n");
962 fclose(scriptFile); 1101 fclose(scriptFile);
963 1102
964 if((chdir(cwd)) < 0) { 1103 if((chdir(cwd)) < 0) {
965 error("error changing working directory", cwd, "chdir"); 1104 error("error changing working directory", cwd, "chdir");
966 } 1105 }
967 1106
968 fclose(fLFile); 1107 fclose(fLFile);
969 if(times) { 1108 if(times) {
970 fclose(tLFile); 1109 fclose(tLFile);
1110 }
1111 if(power) {
1112 fclose(pLFile);
971 } 1113 }
972 fclose(kLFile); 1114 fclose(kLFile);
973 delete[] fName; 1115 delete[] fName;
974 1116
975 status(dbName); 1117 status(dbName);
983 } 1125 }
984 // Update database flags 1126 // Update database flags
985 dbH->flags = dbH->flags|O2_FLAG_L2NORM; 1127 dbH->flags = dbH->flags|O2_FLAG_L2NORM;
986 memcpy (db, dbH, O2_HEADERSIZE); 1128 memcpy (db, dbH, O2_HEADERSIZE);
987 } 1129 }
988 1130
1131 void audioDB::power_flag(const char *dbName) {
1132 initTables(dbName, true, 0);
1133 if (dbH->length > 0) {
1134 error("cannot turn on power storage for non-empty database", dbName);
1135 }
1136 dbH->flags |= O2_FLAG_POWER;
1137 memcpy(db, dbH, O2_HEADERSIZE);
1138 }
1139
1140 bool audioDB::powers_acceptable(double p1, double p2) {
1141 if (use_absolute_threshold) {
1142 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) {
1143 return false;
1144 }
1145 }
1146 if (use_relative_threshold) {
1147 if (fabs(p1-p2) > fabs(relative_threshold)) {
1148 return false;
1149 }
1150 }
1151 return true;
1152 }
989 1153
990 1154
991 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ 1155 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
992 switch(queryType){ 1156 switch(queryType){
993 case O2_POINT_QUERY: 1157 case O2_POINT_QUERY:
1448 if(meanDBdur) 1612 if(meanDBdur)
1449 delete meanDBdur; 1613 delete meanDBdur;
1450 1614
1451 } 1615 }
1452 1616
1617 // This is a common pattern in sequence queries: what we are doing is
1618 // taking a window of length seqlen over a buffer of length length,
1619 // and placing the sum of the elements in that window in the first
1620 // element of the window: thus replacing all but the last seqlen
1621 // elements in the buffer the corresponding windowed sum.
1622 void audioDB::sequence_sum(double *buffer, int length, int seqlen) {
1623 double tmp1, tmp2, *ps;
1624 int j, w;
1625
1626 tmp1 = *buffer;
1627 j = 1;
1628 w = seqlen - 1;
1629 while(w--) {
1630 *buffer += buffer[j++];
1631 }
1632 ps = buffer + 1;
1633 w = length - seqlen; // +1 - 1
1634 while(w--) {
1635 tmp2 = *ps;
1636 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
1637 tmp1 = tmp2;
1638 ps++;
1639 }
1640 }
1641
1642 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) {
1643 int w = length - seqlen + 1;
1644 while(w--) {
1645 *buffer = sqrt(*buffer);
1646 buffer++;
1647 }
1648 }
1649
1650 void audioDB::sequence_average(double *buffer, int length, int seqlen) {
1651 int w = length - seqlen + 1;
1652 while(w--) {
1653 *buffer /= seqlen;
1654 buffer++;
1655 }
1656 }
1453 1657
1454 // k nearest-neighbor (k-NN) search between query and target tracks 1658 // k nearest-neighbor (k-NN) search between query and target tracks
1455 // efficient implementation based on matched filter 1659 // efficient implementation based on matched filter
1456 // assumes normed shingles 1660 // assumes normed shingles
1457 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track 1661 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track
1463 // we use stdout in this stub version 1667 // we use stdout in this stub version
1464 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 1668 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
1465 double* query = (double*)(indata+sizeof(int)); 1669 double* query = (double*)(indata+sizeof(int));
1466 double* queryCopy = 0; 1670 double* queryCopy = 0;
1467 1671
1468 double qMeanL2;
1469 double* sMeanL2;
1470
1471 unsigned USE_THRESH=0;
1472 double SILENCE_THRESH=0;
1473 double DIFF_THRESH=0;
1474
1475 if(!(dbH->flags & O2_FLAG_L2NORM) ) 1672 if(!(dbH->flags & O2_FLAG_L2NORM) )
1476 error("Database must be L2 normed for sequence query","use -L2NORM"); 1673 error("Database must be L2 normed for sequence query","use -L2NORM");
1477 1674
1478 if(numVectors<sequenceLength) 1675 if(numVectors<sequenceLength)
1479 error("Query shorter than requested sequence length", "maybe use -l"); 1676 error("Query shorter than requested sequence length", "maybe use -l");
1486 // Make a copy of the query 1683 // Make a copy of the query
1487 queryCopy = new double[numVectors*dbH->dim]; 1684 queryCopy = new double[numVectors*dbH->dim];
1488 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); 1685 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
1489 qNorm = new double[numVectors]; 1686 qNorm = new double[numVectors];
1490 sNorm = new double[dbVectors]; 1687 sNorm = new double[dbVectors];
1491 sMeanL2=new double[dbH->numFiles]; 1688 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
1492 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
1493 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); 1689 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
1494 query = queryCopy; 1690 query = queryCopy;
1495 1691
1496 // Make norm measurements relative to sequenceLength 1692 // Make norm measurements relative to sequenceLength
1497 unsigned w = sequenceLength-1; 1693 unsigned w = sequenceLength-1;
1498 unsigned i,j; 1694 unsigned i,j;
1499 double* ps;
1500 double tmp1,tmp2;
1501 1695
1502 // Copy the L2 norm values to core to avoid disk random access later on 1696 // Copy the L2 norm values to core to avoid disk random access later on
1503 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); 1697 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
1504 double* qnPtr = qNorm; 1698 double* qnPtr = qNorm;
1505 double* snPtr = sNorm; 1699 double* snPtr = sNorm;
1700
1701 double *sPower = 0, *qPower = 0;
1702 double *spPtr = 0, *qpPtr = 0;
1703
1704 if (usingPower) {
1705 if (!(dbH->flags & O2_FLAG_POWER)) {
1706 error("database not power-enabled", dbName);
1707 }
1708 sPower = new double[dbVectors];
1709 spPtr = sPower;
1710 memcpy(sPower, powerTable, dbVectors * sizeof(double));
1711 }
1712
1506 for(i=0; i<dbH->numFiles; i++){ 1713 for(i=0; i<dbH->numFiles; i++){
1507 if(trackTable[i]>=sequenceLength){ 1714 if(trackTable[i]>=sequenceLength) {
1508 tmp1=*snPtr; 1715 sequence_sum(snPtr, trackTable[i], sequenceLength);
1509 j=1; 1716 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
1510 w=sequenceLength-1; 1717
1511 while(w--) 1718 if (usingPower) {
1512 *snPtr+=snPtr[j++]; 1719 sequence_sum(spPtr, trackTable[i], sequenceLength);
1513 ps = snPtr+1; 1720 sequence_average(spPtr, trackTable[i], sequenceLength);
1514 w=trackTable[i]-sequenceLength; // +1 - 1 1721 }
1515 while(w--){ 1722 }
1516 tmp2=*ps; 1723 snPtr += trackTable[i];
1517 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); 1724 if (usingPower) {
1518 tmp1=tmp2; 1725 spPtr += trackTable[i];
1519 ps++; 1726 }
1520 } 1727 }
1521 ps = snPtr; 1728
1522 w=trackTable[i]-sequenceLength+1; 1729 sequence_sum(qnPtr, numVectors, sequenceLength);
1523 while(w--){ 1730 sequence_sqrt(qnPtr, numVectors, sequenceLength);
1524 *ps=sqrt(*ps); 1731
1525 ps++; 1732 if (usingPower) {
1526 } 1733 qPower = new double[numVectors];
1527 } 1734 qpPtr = qPower;
1528 snPtr+=trackTable[i]; 1735 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
1529 } 1736 error("error seeking to data", powerFileName, "lseek");
1530 1737 }
1531 double* pn = sMeanL2; 1738 int count = read(powerfd, qPower, numVectors * sizeof(double));
1532 w=dbH->numFiles; 1739 if (count == -1) {
1533 while(w--) 1740 error("error reading data", powerFileName, "read");
1534 *pn++=0.0; 1741 }
1535 ps=sNorm; 1742 if ((unsigned) count != numVectors * sizeof(double)) {
1536 unsigned processedTracks=0; 1743 error("short read", powerFileName);
1537 for(i=0; i<dbH->numFiles; i++){ 1744 }
1538 if(trackTable[i]>sequenceLength-1){ 1745
1539 w = trackTable[i]-sequenceLength+1; 1746 sequence_sum(qpPtr, numVectors, sequenceLength);
1540 pn = sMeanL2+i; 1747 sequence_average(qpPtr, numVectors, sequenceLength);
1541 *pn=0; 1748 }
1542 while(w--)
1543 if(*ps>0)
1544 *pn+=*ps++;
1545 *pn/=trackTable[i]-sequenceLength+1;
1546 SILENCE_THRESH+=*pn;
1547 processedTracks++;
1548 }
1549 ps = sNorm + trackTable[i];
1550 }
1551 if(verbosity>1) {
1552 cerr << "processedTracks: " << processedTracks << endl;
1553 }
1554
1555 SILENCE_THRESH/=processedTracks;
1556 USE_THRESH=1; // Turn thresholding on
1557 DIFF_THRESH=SILENCE_THRESH; // mean shingle power
1558 SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
1559 if(verbosity>4) {
1560 cerr << "silence thresh: " << SILENCE_THRESH;
1561 }
1562 w=sequenceLength-1;
1563 i=1;
1564 tmp1=*qnPtr;
1565 while(w--)
1566 *qnPtr+=qnPtr[i++];
1567 ps = qnPtr+1;
1568 w=numVectors-sequenceLength; // +1 -1
1569 while(w--){
1570 tmp2=*ps;
1571 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
1572 tmp1=tmp2;
1573 ps++;
1574 }
1575 ps = qnPtr;
1576 qMeanL2 = 0;
1577 w=numVectors-sequenceLength+1;
1578 while(w--){
1579 *ps=sqrt(*ps);
1580 qMeanL2+=*ps++;
1581 }
1582 qMeanL2 /= numVectors-sequenceLength+1;
1583 1749
1584 if(verbosity>1) { 1750 if(verbosity>1) {
1585 cerr << "done." << endl; 1751 cerr << "done." << endl;
1586 } 1752 }
1587 1753
1660 error("queryPoint > numVectors-wL+1 in query"); 1826 error("queryPoint > numVectors-wL+1 in query");
1661 else{ 1827 else{
1662 if(verbosity>1) { 1828 if(verbosity>1) {
1663 cerr << "query point: " << queryPoint << endl; cerr.flush(); 1829 cerr << "query point: " << queryPoint << endl; cerr.flush();
1664 } 1830 }
1665 query=query+queryPoint*dbH->dim; 1831 query = query + queryPoint * dbH->dim;
1666 qnPtr=qnPtr+queryPoint; 1832 qnPtr = qnPtr + queryPoint;
1833 if (usingPower) {
1834 qpPtr = qpPtr + queryPoint;
1835 }
1667 numVectors=wL; 1836 numVectors=wL;
1668 } 1837 }
1669 1838
1670 double ** D = 0; // Differences query and target 1839 double ** D = 0; // Differences query and target
1671 double ** DD = 0; // Matched filter distance 1840 double ** DD = 0; // Matched filter distance
1674 assert(D); 1843 assert(D);
1675 DD = new double*[numVectors]; 1844 DD = new double*[numVectors];
1676 assert(DD); 1845 assert(DD);
1677 1846
1678 gettimeofday(&tv1, NULL); 1847 gettimeofday(&tv1, NULL);
1679 processedTracks=0; 1848 unsigned processedTracks = 0;
1680 unsigned successfulTracks=0; 1849 unsigned successfulTracks=0;
1681 1850
1682 double* qp; 1851 double* qp;
1683 double* sp; 1852 double* sp;
1684 double* dp; 1853 double* dp;
1792 1961
1793 // Search for minimum distance by shingles (concatenated vectors) 1962 // Search for minimum distance by shingles (concatenated vectors)
1794 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) 1963 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
1795 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ 1964 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
1796 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; 1965 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
1797 if(verbosity>10) { 1966 if(verbosity>9) {
1798 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; 1967 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl;
1799 } 1968 }
1800 // Gather chi^2 statistics 1969 // Gather chi^2 statistics
1801 if(thisDist<minSample) 1970 if(thisDist<minSample)
1802 minSample=thisDist; 1971 minSample=thisDist;
1808 logSampleSum+=log(thisDist); 1977 logSampleSum+=log(thisDist);
1809 } 1978 }
1810 1979
1811 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); 1980 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
1812 // Power test 1981 // Power test
1813 if(!USE_THRESH || 1982 if (usingPower) {
1814 // Threshold on mean L2 of Q and S sequences 1983 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
1815 (USE_THRESH && qnPtr[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && 1984 thisDist = 1000000.0;
1816 // Are both query and target windows above mean energy? 1985 }
1817 (qnPtr[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH ))) 1986 }
1818 thisDist=thisDist; // Computed above
1819 else
1820 thisDist=1000000.0;
1821 1987
1822 // k-NN match algorithm 1988 // k-NN match algorithm
1823 m=pointNN; 1989 m=pointNN;
1824 while(m--){ 1990 while(m--){
1825 if(thisDist<=distances[m]) 1991 if(thisDist<=distances[m])
1932 adbQueryResponse->result.Spos[k]=trackSIndexes[k]; 2098 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
1933 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); 2099 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
1934 } 2100 }
1935 } 2101 }
1936 2102
1937
1938 // Clean up 2103 // Clean up
1939 if(trackOffsetTable) 2104 if(trackOffsetTable)
1940 delete[] trackOffsetTable; 2105 delete[] trackOffsetTable;
1941 if(queryCopy) 2106 if(queryCopy)
1942 delete[] queryCopy; 2107 delete[] queryCopy;
1943 if(qNorm) 2108 if(qNorm)
1944 delete[] qNorm; 2109 delete[] qNorm;
1945 if(sNorm) 2110 if(sNorm)
1946 delete[] sNorm; 2111 delete[] sNorm;
1947 if(sMeanL2) 2112 if(qPower)
1948 delete[] sMeanL2; 2113 delete[] qPower;
2114 if(sPower)
2115 delete[] sPower;
1949 if(D) 2116 if(D)
1950 delete[] D; 2117 delete[] D;
1951 if(DD) 2118 if(DD)
1952 delete[] DD; 2119 delete[] DD;
1953 if(timesdata) 2120 if(timesdata)
1954 delete[] timesdata; 2121 delete[] timesdata;
1955 if(meanDBdur) 2122 if(meanDBdur)
1956 delete[] meanDBdur; 2123 delete[] meanDBdur;
1957
1958
1959 } 2124 }
1960 2125
1961 // Radius search between query and target tracks 2126 // Radius search between query and target tracks
1962 // efficient implementation based on matched filter 2127 // efficient implementation based on matched filter
1963 // assumes normed shingles 2128 // assumes normed shingles
1970 // we use stdout in this stub version 2135 // we use stdout in this stub version
1971 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 2136 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
1972 double* query = (double*)(indata+sizeof(int)); 2137 double* query = (double*)(indata+sizeof(int));
1973 double* queryCopy = 0; 2138 double* queryCopy = 0;
1974 2139
1975 double qMeanL2;
1976 double* sMeanL2;
1977
1978 unsigned USE_THRESH=0;
1979 double SILENCE_THRESH=0;
1980 double DIFF_THRESH=0;
1981
1982 if(!(dbH->flags & O2_FLAG_L2NORM) ) 2140 if(!(dbH->flags & O2_FLAG_L2NORM) )
1983 error("Database must be L2 normed for sequence query","use -l2norm"); 2141 error("Database must be L2 normed for sequence query","use -l2norm");
1984 2142
1985 if(verbosity>1) { 2143 if(verbosity>1) {
1986 cerr << "performing norms ... "; cerr.flush(); 2144 cerr << "performing norms ... "; cerr.flush();
1990 // Make a copy of the query 2148 // Make a copy of the query
1991 queryCopy = new double[numVectors*dbH->dim]; 2149 queryCopy = new double[numVectors*dbH->dim];
1992 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); 2150 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
1993 qNorm = new double[numVectors]; 2151 qNorm = new double[numVectors];
1994 sNorm = new double[dbVectors]; 2152 sNorm = new double[dbVectors];
1995 sMeanL2=new double[dbH->numFiles]; 2153 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
1996 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
1997 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); 2154 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
1998 query = queryCopy; 2155 query = queryCopy;
1999 2156
2000 // Make norm measurements relative to sequenceLength 2157 // Make norm measurements relative to sequenceLength
2001 unsigned w = sequenceLength-1; 2158 unsigned w = sequenceLength-1;
2002 unsigned i,j; 2159 unsigned i,j;
2003 double* ps;
2004 double tmp1,tmp2;
2005 2160
2006 // Copy the L2 norm values to core to avoid disk random access later on 2161 // Copy the L2 norm values to core to avoid disk random access later on
2007 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); 2162 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
2008 double* snPtr = sNorm; 2163 double* snPtr = sNorm;
2009 double* qnPtr = qNorm; 2164 double* qnPtr = qNorm;
2165
2166 double *sPower = 0, *qPower = 0;
2167 double *spPtr = 0, *qpPtr = 0;
2168
2169 if (usingPower) {
2170 if(!(dbH->flags & O2_FLAG_POWER)) {
2171 error("database not power-enabled", dbName);
2172 }
2173 sPower = new double[dbVectors];
2174 spPtr = sPower;
2175 memcpy(sPower, powerTable, dbVectors * sizeof(double));
2176 }
2177
2010 for(i=0; i<dbH->numFiles; i++){ 2178 for(i=0; i<dbH->numFiles; i++){
2011 if(trackTable[i]>=sequenceLength){ 2179 if(trackTable[i]>=sequenceLength) {
2012 tmp1=*snPtr; 2180 sequence_sum(snPtr, trackTable[i], sequenceLength);
2013 j=1; 2181 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
2014 w=sequenceLength-1; 2182 if (usingPower) {
2015 while(w--) 2183 sequence_sum(spPtr, trackTable[i], sequenceLength);
2016 *snPtr+=snPtr[j++]; 2184 sequence_average(spPtr, trackTable[i], sequenceLength);
2017 ps = snPtr+1; 2185 }
2018 w=trackTable[i]-sequenceLength; // +1 - 1 2186 }
2019 while(w--){ 2187 snPtr += trackTable[i];
2020 tmp2=*ps; 2188 if (usingPower) {
2021 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); 2189 spPtr += trackTable[i];
2022 tmp1=tmp2; 2190 }
2023 ps++; 2191 }
2024 } 2192
2025 ps = snPtr; 2193 sequence_sum(qnPtr, numVectors, sequenceLength);
2026 w=trackTable[i]-sequenceLength+1; 2194 sequence_sqrt(qnPtr, numVectors, sequenceLength);
2027 while(w--){ 2195
2028 *ps=sqrt(*ps); 2196 if (usingPower) {
2029 ps++; 2197 qPower = new double[numVectors];
2030 } 2198 qpPtr = qPower;
2031 } 2199 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
2032 snPtr+=trackTable[i]; 2200 error("error seeking to data", powerFileName, "lseek");
2033 } 2201 }
2034 2202 int count = read(powerfd, qPower, numVectors * sizeof(double));
2035 double* pn = sMeanL2; 2203 if (count == -1) {
2036 w=dbH->numFiles; 2204 error("error reading data", powerFileName, "read");
2037 while(w--) 2205 }
2038 *pn++=0.0; 2206 if ((unsigned) count != numVectors * sizeof(double)) {
2039 ps=sNorm; 2207 error("short read", powerFileName);
2040 unsigned processedTracks=0; 2208 }
2041 for(i=0; i<dbH->numFiles; i++){ 2209
2042 if(trackTable[i]>sequenceLength-1){ 2210 sequence_sum(qpPtr, numVectors, sequenceLength);
2043 w = trackTable[i]-sequenceLength+1; 2211 sequence_average(qpPtr, numVectors, sequenceLength);
2044 pn = sMeanL2+i; 2212 }
2045 *pn=0;
2046 while(w--)
2047 if(*ps>0)
2048 *pn+=*ps++;
2049 *pn/=trackTable[i]-sequenceLength+1;
2050 SILENCE_THRESH+=*pn;
2051 processedTracks++;
2052 }
2053 ps = sNorm + trackTable[i];
2054 }
2055 if(verbosity>1) {
2056 cerr << "processedTracks: " << processedTracks << endl;
2057 }
2058
2059 SILENCE_THRESH/=processedTracks;
2060 USE_THRESH=1; // Turn thresholding on
2061 DIFF_THRESH=SILENCE_THRESH; // mean shingle power
2062 SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
2063 if(verbosity>4) {
2064 cerr << "silence thresh: " << SILENCE_THRESH;
2065 }
2066 w=sequenceLength-1;
2067 i=1;
2068 tmp1=*qnPtr;
2069 while(w--)
2070 *qnPtr+=qnPtr[i++];
2071 ps = qnPtr+1;
2072 w=numVectors-sequenceLength; // +1 -1
2073 while(w--){
2074 tmp2=*ps;
2075 *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
2076 tmp1=tmp2;
2077 ps++;
2078 }
2079 ps = qnPtr;
2080 qMeanL2 = 0;
2081 w=numVectors-sequenceLength+1;
2082 while(w--){
2083 *ps=sqrt(*ps);
2084 qMeanL2+=*ps++;
2085 }
2086 qMeanL2 /= numVectors-sequenceLength+1;
2087 2213
2088 if(verbosity>1) { 2214 if(verbosity>1) {
2089 cerr << "done." << endl; 2215 cerr << "done." << endl;
2090 } 2216 }
2091 2217
2164 error("queryPoint > numVectors-wL+1 in query"); 2290 error("queryPoint > numVectors-wL+1 in query");
2165 else{ 2291 else{
2166 if(verbosity>1) { 2292 if(verbosity>1) {
2167 cerr << "query point: " << queryPoint << endl; cerr.flush(); 2293 cerr << "query point: " << queryPoint << endl; cerr.flush();
2168 } 2294 }
2169 query=query+queryPoint*dbH->dim; 2295 query = query + queryPoint*dbH->dim;
2170 qnPtr=qnPtr+queryPoint; 2296 qnPtr = qnPtr + queryPoint;
2297 if (usingPower) {
2298 qpPtr = qpPtr + queryPoint;
2299 }
2171 numVectors=wL; 2300 numVectors=wL;
2172 } 2301 }
2173 2302
2174 double ** D = 0; // Differences query and target 2303 double ** D = 0; // Differences query and target
2175 double ** DD = 0; // Matched filter distance 2304 double ** DD = 0; // Matched filter distance
2178 assert(D); 2307 assert(D);
2179 DD = new double*[numVectors]; 2308 DD = new double*[numVectors];
2180 assert(DD); 2309 assert(DD);
2181 2310
2182 gettimeofday(&tv1, NULL); 2311 gettimeofday(&tv1, NULL);
2183 processedTracks=0; 2312 unsigned processedTracks = 0;
2184 unsigned successfulTracks=0; 2313 unsigned successfulTracks=0;
2185 2314
2186 double* qp; 2315 double* qp;
2187 double* sp; 2316 double* sp;
2188 double* dp; 2317 double* dp;
2296 2425
2297 // Search for minimum distance by shingles (concatenated vectors) 2426 // Search for minimum distance by shingles (concatenated vectors)
2298 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) 2427 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
2299 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ 2428 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
2300 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; 2429 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
2301 if(verbosity>10) { 2430 if(verbosity>9) {
2302 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; 2431 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl;
2303 } 2432 }
2304 // Gather chi^2 statistics 2433 // Gather chi^2 statistics
2305 if(thisDist<minSample) 2434 if(thisDist<minSample)
2306 minSample=thisDist; 2435 minSample=thisDist;
2312 logSampleSum+=log(thisDist); 2441 logSampleSum+=log(thisDist);
2313 } 2442 }
2314 2443
2315 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); 2444 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
2316 // Power test 2445 // Power test
2317 if(!USE_THRESH || 2446 if (usingPower) {
2318 // Threshold on mean L2 of Q and S sequences 2447 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
2319 (USE_THRESH && qnPtr[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && 2448 thisDist = 1000000.0;
2320 // Are both query and target windows above mean energy? 2449 }
2321 (qnPtr[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH ))) 2450 }
2322 thisDist=thisDist; // Computed above 2451
2323 else
2324 thisDist=1000000.0;
2325 if(thisDist>=0 && thisDist<=radius){ 2452 if(thisDist>=0 && thisDist<=radius){
2326 distances[0]++; // increment count 2453 distances[0]++; // increment count
2327 break; // only need one track point per query point 2454 break; // only need one track point per query point
2328 } 2455 }
2329 } 2456 }
2422 delete[] queryCopy; 2549 delete[] queryCopy;
2423 if(qNorm) 2550 if(qNorm)
2424 delete[] qNorm; 2551 delete[] qNorm;
2425 if(sNorm) 2552 if(sNorm)
2426 delete[] sNorm; 2553 delete[] sNorm;
2427 if(sMeanL2) 2554 if(qPower)
2428 delete[] sMeanL2; 2555 delete[] qPower;
2556 if(sPower)
2557 delete[] sPower;
2429 if(D) 2558 if(D)
2430 delete[] D; 2559 delete[] D;
2431 if(DD) 2560 if(DD)
2432 delete[] DD; 2561 delete[] DD;
2433 if(timesdata) 2562 if(timesdata)
2434 delete[] timesdata; 2563 delete[] timesdata;
2435 if(meanDBdur) 2564 if(meanDBdur)
2436 delete[] meanDBdur; 2565 delete[] meanDBdur;
2437
2438
2439 } 2566 }
2440 2567
2441 // Unit norm block of features 2568 // Unit norm block of features
2442 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ 2569 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
2443 unsigned d; 2570 unsigned d;
2643 soap_receiver_fault(soap, err, ""); 2770 soap_receiver_fault(soap, err, "");
2644 return SOAP_FAULT; 2771 return SOAP_FAULT;
2645 } 2772 }
2646 } 2773 }
2647 2774
2775 int adb__sequenceQuery(struct soap* soap, xsd__string dbName, xsd__string qKey,
2776 adb__sequenceQueryParms *parms,
2777 adb__queryResponse &adbQueryResponse) {
2778
2779 char qPosStr[256];
2780 char pointNNStr[256];
2781 char trackNNStr[256];
2782 char seqLenStr[256];
2783 char relative_thresholdStr[256];
2784 char absolute_thresholdStr[256];
2785
2786 /* When the branch is merged, move this to a header and use it
2787 elsewhere */
2788 #define INTSTRINGIFY(val, str) \
2789 snprintf(str, 256, "%d", val);
2790 #define DOUBLESTRINGIFY(val, str) \
2791 snprintf(str, 256, "%f", val);
2792
2793 INTSTRINGIFY(parms->qPos, qPosStr);
2794 INTSTRINGIFY(parms->pointNN, pointNNStr);
2795 INTSTRINGIFY(parms->segNN, trackNNStr);
2796 /* FIXME: decide which of segLen and seqLen should live */
2797 INTSTRINGIFY(parms->segLen, seqLenStr);
2798
2799 DOUBLESTRINGIFY(parms->relative_threshold, relative_thresholdStr);
2800 DOUBLESTRINGIFY(parms->absolute_threshold, absolute_thresholdStr);
2801
2802 const char *argv[] = {
2803 "./audioDB",
2804 COM_QUERY,
2805 "sequence",
2806 COM_DATABASE,
2807 dbName,
2808 COM_FEATURES,
2809 qKey,
2810 COM_KEYLIST,
2811 /* FIXME: when this branch is merged, use ENSURE_STRING */
2812 parms->keyList==0?"":parms->keyList,
2813 COM_TIMES,
2814 parms->timesFileName==0?"":parms->timesFileName,
2815 COM_QUERYPOWER,
2816 parms->powerFileName==0?"":parms->powerFileName,
2817 COM_QPOINT,
2818 qPosStr,
2819 COM_POINTNN,
2820 pointNNStr,
2821 COM_TRACKNN,
2822 trackNNStr,
2823 COM_SEQLEN,
2824 seqLenStr,
2825 COM_RELATIVE_THRESH,
2826 relative_thresholdStr,
2827 COM_ABSOLUTE_THRESH,
2828 absolute_thresholdStr
2829 };
2830
2831 const unsigned argc = 25;
2832
2833 try {
2834 audioDB(argc, (char* const*)argv, &adbQueryResponse);
2835 return SOAP_OK;
2836 } catch (char *err) {
2837 soap_receiver_fault(soap, err, "");
2838 return SOAP_FAULT;
2839 }
2840 }
2841
2648 int main(const unsigned argc, char* const argv[]){ 2842 int main(const unsigned argc, char* const argv[]){
2649 audioDB(argc, argv); 2843 audioDB(argc, argv);
2650 } 2844 }
2651