comparison audioDB.cpp @ 204:2ea1908707c7 refactoring

Filewise refactor. Break apart huge monolithic audioDB.cpp file into seven broadly independent portions: * SOAP * DB creation * insertion * query * dump * common functionality * constructor functions Remove the "using namespace std" from the header file, though that wasn't actually a problem: the problem in question is solved by including adb.nsmap in only soap.cpp. Makefile improvements.
author mas01cr
date Wed, 28 Nov 2007 15:10:28 +0000
parents 7c9feaceeab5
children 9c3396bab02e
comparison
equal deleted inserted replaced
203:4b05c5bbf06d 204:2ea1908707c7
1 #include "audioDB.h" 1 #include "audioDB.h"
2
3 #if defined(O2_DEBUG)
4 void sigterm_action(int signal, siginfo_t *info, void *context) {
5 exit(128+signal);
6 }
7
8 void sighup_action(int signal, siginfo_t *info, void *context) {
9 // FIXME: reread any configuration files
10 }
11 #endif
12
13 void audioDB::error(const char* a, const char* b, const char *sysFunc) {
14 if(isServer) {
15 /* FIXME: I think this is leaky -- we never delete err. actually
16 deleting it is tricky, though; it gets placed into some
17 soap-internal struct with uncertain extent... -- CSR,
18 2007-10-01 */
19 char *err = new char[256]; /* FIXME: overflows */
20 snprintf(err, 255, "%s: %s\n%s", a, b, sysFunc ? strerror(errno) : "");
21 /* FIXME: actually we could usefully do with a properly structured
22 type, so that we can throw separate faultstring and details.
23 -- CSR, 2007-10-01 */
24 throw(err);
25 } else {
26 cerr << a << ": " << b << endl;
27 if (sysFunc) {
28 perror(sysFunc);
29 }
30 exit(1);
31 }
32 }
33 2
34 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS 3 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS
35 { 4 {
36 if(processArgs(argc, argv)<0){ 5 if(processArgs(argc, argv)<0){
37 printf("No command found.\n"); 6 printf("No command found.\n");
159 } 128 }
160 129
161 if(args_info.verbosity_given){ 130 if(args_info.verbosity_given){
162 verbosity=args_info.verbosity_arg; 131 verbosity=args_info.verbosity_arg;
163 if(verbosity<0 || verbosity>10){ 132 if(verbosity<0 || verbosity>10){
164 cerr << "Warning: verbosity out of range, setting to 1" << endl; 133 std::cerr << "Warning: verbosity out of range, setting to 1" << std::endl;
165 verbosity=1; 134 verbosity=1;
166 } 135 }
167 } 136 }
168 137
169 if(args_info.size_given) { 138 if(args_info.size_given) {
178 if(radius<=0 || radius>1000000000){ 147 if(radius<=0 || radius>1000000000){
179 error("radius out of range"); 148 error("radius out of range");
180 } 149 }
181 else 150 else
182 if(verbosity>3) { 151 if(verbosity>3) {
183 cerr << "Setting radius to " << radius << endl; 152 std::cerr << "Setting radius to " << radius << std::endl;
184 } 153 }
185 } 154 }
186 155
187 if(args_info.SERVER_given){ 156 if(args_info.SERVER_given){
188 command=COM_SERVER; 157 command=COM_SERVER;
247 if(args_info.key_given) 216 if(args_info.key_given)
248 key=args_info.key_arg; 217 key=args_info.key_arg;
249 if(args_info.times_given){ 218 if(args_info.times_given){
250 timesFileName=args_info.times_arg; 219 timesFileName=args_info.times_arg;
251 if(strlen(timesFileName)>0){ 220 if(strlen(timesFileName)>0){
252 if(!(timesFile = new ifstream(timesFileName,ios::in))) 221 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
253 error("Could not open times file for reading", timesFileName); 222 error("Could not open times file for reading", timesFileName);
254 usingTimes=1; 223 usingTimes=1;
255 } 224 }
256 } 225 }
257 if (args_info.power_given) { 226 if (args_info.power_given) {
274 key=args_info.keyList_arg; // INCONSISTENT NO CHECK 243 key=args_info.keyList_arg; // INCONSISTENT NO CHECK
275 244
276 /* TO DO: REPLACE WITH 245 /* TO DO: REPLACE WITH
277 if(args_info.keyList_given){ 246 if(args_info.keyList_given){
278 trackFileName=args_info.keyList_arg; 247 trackFileName=args_info.keyList_arg;
279 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) 248 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
280 error("Could not open keyList file for reading",trackFileName); 249 error("Could not open keyList file for reading",trackFileName);
281 } 250 }
282 AND UPDATE BATCHINSERT() 251 AND UPDATE BATCHINSERT()
283 */ 252 */
284 253
285 if(args_info.timesList_given){ 254 if(args_info.timesList_given){
286 timesFileName=args_info.timesList_arg; 255 timesFileName=args_info.timesList_arg;
287 if(strlen(timesFileName)>0){ 256 if(strlen(timesFileName)>0){
288 if(!(timesFile = new ifstream(timesFileName,ios::in))) 257 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
289 error("Could not open timesList file for reading", timesFileName); 258 error("Could not open timesList file for reading", timesFileName);
290 usingTimes=1; 259 usingTimes=1;
291 } 260 }
292 } 261 }
293 if(args_info.powerList_given){ 262 if(args_info.powerList_given){
294 powerFileName=args_info.powerList_arg; 263 powerFileName=args_info.powerList_arg;
295 if(strlen(powerFileName)>0){ 264 if(strlen(powerFileName)>0){
296 if(!(powerFile = new ifstream(powerFileName,ios::in))) 265 if(!(powerFile = new std::ifstream(powerFileName,std::ios::in)))
297 error("Could not open powerList file for reading", powerFileName); 266 error("Could not open powerList file for reading", powerFileName);
298 usingPower=1; 267 usingPower=1;
299 } 268 }
300 } 269 }
301 return 0; 270 return 0;
307 dbName=args_info.database_arg; 276 dbName=args_info.database_arg;
308 inFile=args_info.features_arg; 277 inFile=args_info.features_arg;
309 278
310 if(args_info.keyList_given){ 279 if(args_info.keyList_given){
311 trackFileName=args_info.keyList_arg; 280 trackFileName=args_info.keyList_arg;
312 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) 281 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
313 error("Could not open keyList file for reading",trackFileName); 282 error("Could not open keyList file for reading",trackFileName);
314 } 283 }
315 284
316 if(args_info.times_given){ 285 if(args_info.times_given){
317 timesFileName=args_info.times_arg; 286 timesFileName=args_info.times_arg;
318 if(strlen(timesFileName)>0){ 287 if(strlen(timesFileName)>0){
319 if(!(timesFile = new ifstream(timesFileName,ios::in))) 288 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
320 error("Could not open times file for reading", timesFileName); 289 error("Could not open times file for reading", timesFileName);
321 usingTimes=1; 290 usingTimes=1;
322 } 291 }
323 } 292 }
324 293
380 return 0; 349 return 0;
381 } 350 }
382 return -1; // no command found 351 return -1; // no command found
383 } 352 }
384 353
385 void audioDB::get_lock(int fd, bool exclusive) {
386 struct flock lock;
387 int status;
388
389 lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
390 lock.l_whence = SEEK_SET;
391 lock.l_start = 0;
392 lock.l_len = 0; /* "the whole file" */
393
394 retry:
395 do {
396 status = fcntl(fd, F_SETLKW, &lock);
397 } while (status != 0 && errno == EINTR);
398
399 if (status) {
400 if (errno == EAGAIN) {
401 sleep(1);
402 goto retry;
403 } else {
404 error("fcntl lock error", "", "fcntl");
405 }
406 }
407 }
408
409 void audioDB::release_lock(int fd) {
410 struct flock lock;
411 int status;
412
413 lock.l_type = F_UNLCK;
414 lock.l_whence = SEEK_SET;
415 lock.l_start = 0;
416 lock.l_len = 0;
417
418 status = fcntl(fd, F_SETLKW, &lock);
419
420 if (status)
421 error("fcntl unlock error", "", "fcntl");
422 }
423
424 /* Make a new database.
425
426 The database consists of:
427
428 * a header (see dbTableHeader struct definition);
429 * keyTable: list of keys of tracks;
430 * trackTable: Maps implicit feature index to a feature vector
431 matrix (sizes of tracks)
432 * featureTable: Lots of doubles;
433 * timesTable: (start,end) time points for each feature vector;
434 * powerTable: associated power for each feature vector;
435 * l2normTable: squared l2norms for each feature vector.
436 */
437 void audioDB::create(const char* dbName){
438 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
439 error("Can't create database file", dbName, "open");
440 get_lock(dbfid, 1);
441
442 if(verbosity) {
443 cerr << "header size:" << O2_HEADERSIZE << endl;
444 }
445
446 dbH = new dbTableHeaderT();
447 assert(dbH);
448
449 unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE);
450
451 // Initialize header
452 dbH->magic = O2_MAGIC;
453 dbH->version = O2_FORMAT_VERSION;
454 dbH->numFiles = 0;
455 dbH->dim = 0;
456 dbH->flags = 0;
457 dbH->length = 0;
458 dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE);
459 dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles);
460 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles);
461 dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double));
462 dbH->powerTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double));
463 dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->powerTableOffset - 2*maxfiles*O2_MEANNUMVECTORS*sizeof(double));
464 dbH->dbSize = size;
465
466 write(dbfid, dbH, O2_HEADERSIZE);
467
468 // go to the location corresponding to the last byte
469 if (lseek (dbfid, size - 1, SEEK_SET) == -1)
470 error("lseek error in db file", "", "lseek");
471
472 // write a dummy byte at the last location
473 if (write (dbfid, "", 1) != 1)
474 error("write error", "", "write");
475
476 if(verbosity) {
477 cerr << COM_CREATE << " " << dbName << endl;
478 }
479 }
480
481 void audioDB::drop(){
482 // FIXME: drop something? Should we even allow this?
483 }
484
485 void audioDB::initDBHeader(const char* dbName) {
486 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) {
487 error("Can't open database file", dbName, "open");
488 }
489
490 get_lock(dbfid, forWrite);
491 // Get the database header info
492 dbH = new dbTableHeaderT();
493 assert(dbH);
494
495 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) {
496 error("error reading db header", dbName, "read");
497 }
498
499 if(dbH->magic == O2_OLD_MAGIC) {
500 // FIXME: if anyone ever complains, write the program to convert
501 // from the old audioDB format to the new...
502 error("database file has old O2 header", dbName);
503 }
504
505 if(dbH->magic != O2_MAGIC) {
506 cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl;
507 error("database file has incorrect header", dbName);
508 }
509
510 if(dbH->version != O2_FORMAT_VERSION) {
511 error("database file has incorect version", dbName);
512 }
513
514 #define CHECKED_MMAP(type, var, start, length) \
515 { void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \
516 if(tmp == (void *) -1) { \
517 error("mmap error for db table", #var, "mmap"); \
518 } \
519 var = (type) tmp; \
520 }
521
522 CHECKED_MMAP(char *, db, 0, getpagesize());
523
524 // Make some handy tables with correct types
525 if(forWrite || (dbH->length > 0)) {
526 if(forWrite) {
527 fileTableLength = dbH->trackTableOffset - dbH->fileTableOffset;
528 trackTableLength = dbH->dataOffset - dbH->trackTableOffset;
529 dataBufLength = dbH->timesTableOffset - dbH->dataOffset;
530 timesTableLength = dbH->powerTableOffset - dbH->timesTableOffset;
531 powerTableLength = dbH->l2normTableOffset - dbH->powerTableOffset;
532 l2normTableLength = dbH->dbSize - dbH->l2normTableOffset;
533 } else {
534 fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE);
535 trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE);
536 dataBufLength = ALIGN_PAGE_UP(dbH->length);
537 timesTableLength = ALIGN_PAGE_UP(2*(dbH->length / dbH->dim));
538 powerTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
539 l2normTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
540 }
541 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, fileTableLength);
542 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, trackTableLength);
543 /*
544 * No more mmap() for dataBuf
545 *
546 * FIXME: Actually we do do the mmap() in the two cases where it's
547 * still "needed": in pointQuery and in l2norm if dbH->length is
548 * non-zero. Removing those cases too (and deleting the dataBuf
549 * variable completely) would be cool. -- CSR, 2007-11-19
550 *
551 * CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
552 */
553 CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, timesTableLength);
554 CHECKED_MMAP(double *, powerTable, dbH->powerTableOffset, powerTableLength);
555 CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, l2normTableLength);
556 }
557 }
558
559 void audioDB::initInputFile (const char *inFile) {
560 if (inFile) {
561 if ((infid = open(inFile, O_RDONLY)) < 0) {
562 error("can't open input file for reading", inFile, "open");
563 }
564
565 if (fstat(infid, &statbuf) < 0) {
566 error("fstat error finding size of input", inFile, "fstat");
567 }
568
569 if(dbH->dim == 0 && dbH->length == 0) { // empty database
570 // initialize with input dimensionality
571 if(read(infid, &dbH->dim, sizeof(unsigned)) != sizeof(unsigned)) {
572 error("short read of input file", inFile);
573 }
574 if(dbH->dim == 0) {
575 error("dimensionality of zero in input file", inFile);
576 }
577 } else {
578 unsigned test;
579 if(read(infid, &test, sizeof(unsigned)) != sizeof(unsigned)) {
580 error("short read of input file", inFile);
581 }
582 if(dbH->dim == 0) {
583 error("dimensionality of zero in input file", inFile);
584 }
585 if(dbH->dim != test) {
586 cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <<endl;
587 error("feature dimensions do not match database table dimensions", inFile);
588 }
589 }
590
591 if ((indata = (char *) mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) == (caddr_t) -1) {
592 error("mmap error for input", inFile, "mmap");
593 }
594 }
595 }
596
597 void audioDB::initTables(const char* dbName, const char* inFile = 0) {
598 initDBHeader(dbName);
599 initInputFile(inFile);
600 }
601
602 bool audioDB::enough_data_space_free(off_t size) {
603 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
604 }
605
606 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
607 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET);
608 write(dbfid, buffer, size);
609 }
610
611 void audioDB::insert(const char* dbName, const char* inFile) {
612 forWrite = true;
613 initTables(dbName, inFile);
614
615 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
616 error("Must use timestamps with timestamped database","use --times");
617
618 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
619 error("Must use power with power-enabled database", dbName);
620
621 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
622 error("Insert failed: no more room in database", inFile);
623 }
624
625 if(!key)
626 key=inFile;
627 // Linear scan of filenames check for pre-existing feature
628 unsigned alreadyInserted=0;
629 for(unsigned k=0; k<dbH->numFiles; k++)
630 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){
631 alreadyInserted=1;
632 break;
633 }
634
635 if(alreadyInserted){
636 if(verbosity) {
637 cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl;
638 }
639 return;
640 }
641
642 // Make a track index table of features to file indexes
643 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
644 if(!numVectors){
645 if(verbosity) {
646 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl;
647 }
648 // CLEAN UP
649 munmap(indata,statbuf.st_size);
650 munmap(db,dbH->dbSize);
651 close(infid);
652 return;
653 }
654
655 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
656
657 off_t insertoffset = dbH->length;// Store current state
658
659 // Check times status and insert times from file
660 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
661 double *timesdata = timesTable + 2*indexoffset;
662
663 if(2*(indexoffset + numVectors) > timesTableLength) {
664 error("out of space for times", key);
665 }
666
667 if (usingTimes) {
668 insertTimeStamps(numVectors, timesFile, timesdata);
669 }
670
671 double *powerdata = powerTable + indexoffset;
672 insertPowerData(numVectors, powerfd, powerdata);
673
674 // Increment file count
675 dbH->numFiles++;
676
677 // Update Header information
678 dbH->length+=(statbuf.st_size-sizeof(int));
679
680 // Update track to file index map
681 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
682
683 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
684
685 // Norm the vectors on input if the database is already L2 normed
686 if(dbH->flags & O2_FLAG_L2NORM)
687 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
688
689 // Report status
690 status(dbName);
691 if(verbosity) {
692 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
693 << (statbuf.st_size-sizeof(int)) << " bytes." << endl;
694 }
695
696 // Copy the header back to the database
697 memcpy (db, dbH, sizeof(dbTableHeaderT));
698
699 // CLEAN UP
700 munmap(indata,statbuf.st_size);
701 close(infid);
702 }
703
704 void audioDB::insertTimeStamps(unsigned numVectors, ifstream *timesFile, double *timesdata) {
705 assert(usingTimes);
706
707 unsigned numtimes = 0;
708
709 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
710 dbH->flags=dbH->flags|O2_FLAG_TIMES;
711 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
712 error("Timestamp file used with non-timestamped database", timesFileName);
713 }
714
715 if(!timesFile->is_open()) {
716 error("problem opening times file on timestamped database", timesFileName);
717 }
718
719 double timepoint, next;
720 *timesFile >> timepoint;
721 if (timesFile->eof()) {
722 error("no entries in times file", timesFileName);
723 }
724 numtimes++;
725 do {
726 *timesFile >> next;
727 if (timesFile->eof()) {
728 break;
729 }
730 numtimes++;
731 timesdata[0] = timepoint;
732 timepoint = (timesdata[1] = next);
733 timesdata += 2;
734 } while (numtimes < numVectors + 1);
735
736 if (numtimes < numVectors + 1) {
737 error("too few timepoints in times file", timesFileName);
738 }
739
740 *timesFile >> next;
741 if (!timesFile->eof()) {
742 error("too many timepoints in times file", timesFileName);
743 }
744 }
745
746 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
747 if (usingPower) {
748 if (!(dbH->flags & O2_FLAG_POWER)) {
749 error("Cannot insert power data on non-power DB", dbName);
750 }
751
752 int one;
753 unsigned int count;
754
755 count = read(powerfd, &one, sizeof(unsigned int));
756 if (count != sizeof(unsigned int)) {
757 error("powerfd read failed", "int", "read");
758 }
759 if (one != 1) {
760 error("dimensionality of power file not 1", powerFileName);
761 }
762
763 // FIXME: should check that the powerfile is the right size for
764 // this. -- CSR, 2007-10-30
765 count = read(powerfd, powerdata, numVectors * sizeof(double));
766 if (count != numVectors * sizeof(double)) {
767 error("powerfd read failed", "double", "read");
768 }
769 }
770 }
771
772 void audioDB::batchinsert(const char* dbName, const char* inFile) {
773
774 forWrite = true;
775 initDBHeader(dbName);
776
777 if(!key)
778 key=inFile;
779 ifstream *filesIn = 0;
780 ifstream *keysIn = 0;
781 ifstream* thisTimesFile = 0;
782 int thispowerfd = 0;
783
784 if(!(filesIn = new ifstream(inFile)))
785 error("Could not open batch in file", inFile);
786 if(key && key!=inFile)
787 if(!(keysIn = new ifstream(key)))
788 error("Could not open batch key file",key);
789
790 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
791 error("Must use timestamps with timestamped database","use --times");
792
793 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
794 error("Must use power with power-enabled database", dbName);
795
796 unsigned totalVectors=0;
797 char *thisKey = new char[MAXSTR];
798 char *thisFile = new char[MAXSTR];
799 char *thisTimesFileName = new char[MAXSTR];
800 char *thisPowerFileName = new char[MAXSTR];
801
802 do{
803 filesIn->getline(thisFile,MAXSTR);
804 if(key && key!=inFile)
805 keysIn->getline(thisKey,MAXSTR);
806 else
807 thisKey = thisFile;
808 if(usingTimes)
809 timesFile->getline(thisTimesFileName,MAXSTR);
810 if(usingPower)
811 powerFile->getline(thisPowerFileName, MAXSTR);
812
813 if(filesIn->eof())
814 break;
815
816 initInputFile(thisFile);
817
818 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
819 error("batchinsert failed: no more room in database", thisFile);
820 }
821
822 // Linear scan of filenames check for pre-existing feature
823 unsigned alreadyInserted=0;
824
825 for(unsigned k=0; k<dbH->numFiles; k++)
826 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){
827 alreadyInserted=1;
828 break;
829 }
830
831 if(alreadyInserted){
832 if(verbosity) {
833 cerr << "Warning: key already exists in database:" << thisKey << endl;
834 }
835 }
836 else{
837
838 // Make a track index table of features to file indexes
839 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
840 if(!numVectors){
841 if(verbosity) {
842 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl;
843 }
844 }
845 else{
846 if(usingTimes){
847 if(timesFile->eof()) {
848 error("not enough timestamp files in timesList", timesFileName);
849 }
850 thisTimesFile = new ifstream(thisTimesFileName,ios::in);
851 if(!thisTimesFile->is_open()) {
852 error("Cannot open timestamp file", thisTimesFileName);
853 }
854 off_t insertoffset = dbH->length;
855 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
856 double *timesdata = timesTable + 2*indexoffset;
857 if(2*(indexoffset + numVectors) > timesTableLength) {
858 error("out of space for times", key);
859 }
860 insertTimeStamps(numVectors, thisTimesFile, timesdata);
861 if(thisTimesFile)
862 delete thisTimesFile;
863 }
864
865 if (usingPower) {
866 if(powerFile->eof()) {
867 error("not enough power files in powerList", powerFileName);
868 }
869 thispowerfd = open(thisPowerFileName, O_RDONLY);
870 if (thispowerfd < 0) {
871 error("failed to open power file", thisPowerFileName);
872 }
873 unsigned insertoffset = dbH->length;
874 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
875 double *powerdata = powerTable + poweroffset;
876 insertPowerData(numVectors, thispowerfd, powerdata);
877 if (0 < thispowerfd) {
878 close(thispowerfd);
879 }
880 }
881 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
882
883 off_t insertoffset = dbH->length;// Store current state
884
885 // Increment file count
886 dbH->numFiles++;
887
888 // Update Header information
889 dbH->length+=(statbuf.st_size-sizeof(int));
890
891 // Update track to file index map
892 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
893
894 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
895
896 // Norm the vectors on input if the database is already L2 normed
897 if(dbH->flags & O2_FLAG_L2NORM)
898 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
899
900 totalVectors+=numVectors;
901
902 // Copy the header back to the database
903 memcpy (db, dbH, sizeof(dbTableHeaderT));
904 }
905 }
906 // CLEAN UP
907 munmap(indata,statbuf.st_size);
908 close(infid);
909 }while(!filesIn->eof());
910
911 if(verbosity) {
912 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
913 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl;
914 }
915
916 // Report status
917 status(dbName);
918 }
919
920 // FIXME: this can't propagate the sequence length argument (used for
921 // dudCount). See adb__status() definition for the other half of
922 // this. -- CSR, 2007-10-01
923 void audioDB::ws_status(const char*dbName, char* hostport){
924 struct soap soap;
925 adb__statusResponse adbStatusResponse;
926
927 // Query an existing adb database
928 soap_init(&soap);
929 if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResponse)==SOAP_OK) {
930 cout << "numFiles = " << adbStatusResponse.result.numFiles << endl;
931 cout << "dim = " << adbStatusResponse.result.dim << endl;
932 cout << "length = " << adbStatusResponse.result.length << endl;
933 cout << "dudCount = " << adbStatusResponse.result.dudCount << endl;
934 cout << "nullCount = " << adbStatusResponse.result.nullCount << endl;
935 cout << "flags = " << adbStatusResponse.result.flags << endl;
936 } else {
937 soap_print_fault(&soap,stderr);
938 }
939
940 soap_destroy(&soap);
941 soap_end(&soap);
942 soap_done(&soap);
943 }
944
945 void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){
946 struct soap soap;
947 adb__queryResponse adbQueryResponse;
948
949 soap_init(&soap);
950 if(soap_call_adb__query(&soap,hostport,NULL,
951 (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName,
952 queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResponse)==SOAP_OK){
953 //std::cerr << "result list length:" << adbQueryResponse.result.__sizeRlist << std::endl;
954 for(int i=0; i<adbQueryResponse.result.__sizeRlist; i++)
955 std::cout << adbQueryResponse.result.Rlist[i] << " " << adbQueryResponse.result.Dist[i]
956 << " " << adbQueryResponse.result.Qpos[i] << " " << adbQueryResponse.result.Spos[i] << std::endl;
957 }
958 else
959 soap_print_fault(&soap,stderr);
960
961 soap_destroy(&soap);
962 soap_end(&soap);
963 soap_done(&soap);
964
965 }
966
967
968 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){ 354 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){
969 if(!dbH) 355 if(!dbH)
970 initTables(dbName, 0); 356 initTables(dbName, 0);
971 357
972 unsigned dudCount=0; 358 unsigned dudCount=0;
980 } 366 }
981 367
982 if(adbStatusResponse == 0) { 368 if(adbStatusResponse == 0) {
983 369
984 // Update Header information 370 // Update Header information
985 cout << "num files:" << dbH->numFiles << endl; 371 std::cout << "num files:" << dbH->numFiles << std::endl;
986 cout << "data dim:" << dbH->dim <<endl; 372 std::cout << "data dim:" << dbH->dim <<std::endl;
987 if(dbH->dim>0){ 373 if(dbH->dim>0){
988 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl; 374 std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl;
989 cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << endl; 375 std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl;
990 } 376 }
991 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; 377 std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
992 cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << 378 std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" <<
993 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; 379 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
994 cout << "flags:" << dbH->flags << endl; 380 std::cout << "flags:" << dbH->flags << std::endl;
995 381
996 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; 382 std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl;
997 } else { 383 } else {
998 adbStatusResponse->result.numFiles = dbH->numFiles; 384 adbStatusResponse->result.numFiles = dbH->numFiles;
999 adbStatusResponse->result.dim = dbH->dim; 385 adbStatusResponse->result.dim = dbH->dim;
1000 adbStatusResponse->result.length = dbH->length; 386 adbStatusResponse->result.length = dbH->length;
1001 adbStatusResponse->result.dudCount = dudCount; 387 adbStatusResponse->result.dudCount = dudCount;
1002 adbStatusResponse->result.nullCount = nullCount; 388 adbStatusResponse->result.nullCount = nullCount;
1003 adbStatusResponse->result.flags = dbH->flags; 389 adbStatusResponse->result.flags = dbH->flags;
1004 } 390 }
1005 }
1006
1007 void audioDB::dump(const char* dbName){
1008 if(!dbH) {
1009 initTables(dbName, 0);
1010 }
1011
1012 if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
1013 error("error making output directory", output, "mkdir");
1014 }
1015
1016 char *cwd = new char[PATH_MAX];
1017
1018 if ((getcwd(cwd, PATH_MAX)) == 0) {
1019 error("error getting working directory", "", "getcwd");
1020 }
1021
1022 if((chdir(output)) < 0) {
1023 error("error changing working directory", output, "chdir");
1024 }
1025
1026 int fLfd, tLfd = 0, pLfd = 0, kLfd;
1027 FILE *fLFile, *tLFile = 0, *pLFile = 0, *kLFile;
1028
1029 if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
1030 error("error creating featureList file", "featureList.txt", "open");
1031 }
1032
1033 int times = dbH->flags & O2_FLAG_TIMES;
1034 if (times) {
1035 if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
1036 error("error creating timesList file", "timesList.txt", "open");
1037 }
1038 }
1039
1040 int power = dbH->flags & O2_FLAG_POWER;
1041 if (power) {
1042 if ((pLfd = open("powerList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
1043 error("error creating powerList file", "powerList.txt", "open");
1044 }
1045 }
1046
1047 if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
1048 error("error creating keyList file", "keyList.txt", "open");
1049 }
1050
1051 /* can these fail? I sincerely hope not. */
1052 fLFile = fdopen(fLfd, "w");
1053 if (times) {
1054 tLFile = fdopen(tLfd, "w");
1055 }
1056 if (power) {
1057 pLFile = fdopen(pLfd, "w");
1058 }
1059 kLFile = fdopen(kLfd, "w");
1060
1061 char *fName = new char[256];
1062 int ffd, pfd;
1063 FILE *tFile;
1064 unsigned pos = 0;
1065 lseek(dbfid, dbH->dataOffset, SEEK_SET);
1066 double *data_buffer;
1067 size_t data_buffer_size;
1068 for(unsigned k = 0; k < dbH->numFiles; k++) {
1069 fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE);
1070 snprintf(fName, 256, "%05d.features", k);
1071 if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
1072 error("error creating feature file", fName, "open");
1073 }
1074 if ((write(ffd, &dbH->dim, sizeof(uint32_t))) < 0) {
1075 error("error writing dimensions", fName, "write");
1076 }
1077
1078 /* FIXME: this repeated malloc()/free() of data buffers is
1079 inefficient. */
1080 data_buffer_size = trackTable[k] * dbH->dim * sizeof(double);
1081
1082 {
1083 void *tmp = malloc(data_buffer_size);
1084 if (tmp == NULL) {
1085 error("error allocating data buffer");
1086 }
1087 data_buffer = (double *) tmp;
1088 }
1089
1090 if ((read(dbfid, data_buffer, data_buffer_size)) != (ssize_t) data_buffer_size) {
1091 error("error reading data", fName, "read");
1092 }
1093
1094 if ((write(ffd, data_buffer, data_buffer_size)) < 0) {
1095 error("error writing data", fName, "write");
1096 }
1097
1098 free(data_buffer);
1099
1100 fprintf(fLFile, "%s\n", fName);
1101 close(ffd);
1102
1103 if (times) {
1104 snprintf(fName, 256, "%05d.times", k);
1105 tFile = fopen(fName, "w");
1106 for(unsigned i = 0; i < trackTable[k]; i++) {
1107 // KLUDGE: specifying 16 digits of precision after the decimal
1108 // point is (but check this!) sufficient to uniquely identify
1109 // doubles; however, that will cause ugliness, as that's
1110 // vastly too many for most values of interest. Moving to %a
1111 // here and scanf() in the timesFile reading might fix this.
1112 // -- CSR, 2007-10-19
1113 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*i));
1114 }
1115 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*trackTable[k]-1));
1116
1117 fprintf(tLFile, "%s\n", fName);
1118 }
1119
1120 if (power) {
1121 uint32_t one = 1;
1122 snprintf(fName, 256, "%05d.power", k);
1123 if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
1124 error("error creating power file", fName, "open");
1125 }
1126 if ((write(pfd, &one, sizeof(uint32_t))) < 0) {
1127 error("error writing one", fName, "write");
1128 }
1129 if ((write(pfd, powerTable + pos, trackTable[k] * sizeof(double))) < 0) {
1130 error("error writing data", fName, "write");
1131 }
1132 fprintf(pLFile, "%s\n", fName);
1133 close(pfd);
1134 }
1135
1136 pos += trackTable[k];
1137 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl;
1138 }
1139
1140 FILE *scriptFile;
1141 scriptFile = fopen("restore.sh", "w");
1142 fprintf(scriptFile, "\
1143 #! /bin/sh\n\
1144 #\n\
1145 # usage: AUDIODB=/path/to/audioDB sh ./restore.sh <newdb>\n\
1146 \n\
1147 if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\
1148 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\
1149 \"${AUDIODB}\" -d \"$1\" -N --size=%d\n", (int) (dbH->dbSize / 1000000));
1150 if(dbH->flags & O2_FLAG_L2NORM) {
1151 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n");
1152 }
1153 if(power) {
1154 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -P\n");
1155 }
1156 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt");
1157 if(times) {
1158 fprintf(scriptFile, " -T timesList.txt");
1159 }
1160 if(power) {
1161 fprintf(scriptFile, " -W powerList.txt");
1162 }
1163 fprintf(scriptFile, "\n");
1164 fclose(scriptFile);
1165
1166 if((chdir(cwd)) < 0) {
1167 error("error changing working directory", cwd, "chdir");
1168 }
1169
1170 fclose(fLFile);
1171 if(times) {
1172 fclose(tLFile);
1173 }
1174 if(power) {
1175 fclose(pLFile);
1176 }
1177 fclose(kLFile);
1178 delete[] fName;
1179
1180 status(dbName);
1181 } 391 }
1182 392
1183 void audioDB::l2norm(const char* dbName) { 393 void audioDB::l2norm(const char* dbName) {
1184 forWrite = true; 394 forWrite = true;
1185 initTables(dbName, 0); 395 initTables(dbName, 0);
1202 } 412 }
1203 dbH->flags |= O2_FLAG_POWER; 413 dbH->flags |= O2_FLAG_POWER;
1204 memcpy(db, dbH, O2_HEADERSIZE); 414 memcpy(db, dbH, O2_HEADERSIZE);
1205 } 415 }
1206 416
1207 bool audioDB::powers_acceptable(double p1, double p2) {
1208 if (use_absolute_threshold) {
1209 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) {
1210 return false;
1211 }
1212 }
1213 if (use_relative_threshold) {
1214 if (fabs(p1-p2) > fabs(relative_threshold)) {
1215 return false;
1216 }
1217 }
1218 return true;
1219 }
1220
1221 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
1222 switch(queryType){
1223 case O2_POINT_QUERY:
1224 pointQuery(dbName, inFile, adbQueryResponse);
1225 break;
1226 case O2_SEQUENCE_QUERY:
1227 if(radius==0)
1228 trackSequenceQueryNN(dbName, inFile, adbQueryResponse);
1229 else
1230 trackSequenceQueryRad(dbName, inFile, adbQueryResponse);
1231 break;
1232 case O2_TRACK_QUERY:
1233 trackPointQuery(dbName, inFile, adbQueryResponse);
1234 break;
1235 default:
1236 error("unrecognized queryType in query()");
1237
1238 }
1239 }
1240
1241 //return ordinal position of key in keyTable
1242 unsigned audioDB::getKeyPos(char* key){
1243 for(unsigned k=0; k<dbH->numFiles; k++)
1244 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
1245 return k;
1246 error("Key not found",key);
1247 return O2_ERR_KEYNOTFOUND;
1248 }
1249
1250 // Basic point query engine
1251 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
1252
1253 initTables(dbName, inFile);
1254
1255 // For each input vector, find the closest pointNN matching output vectors and report
1256 // we use stdout in this stub version
1257 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
1258
1259 double* query = (double*)(indata+sizeof(int));
1260 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
1261 double* data = dataBuf;
1262 double* queryCopy = 0;
1263
1264 if( dbH->flags & O2_FLAG_L2NORM ){
1265 // Make a copy of the query
1266 queryCopy = new double[numVectors*dbH->dim];
1267 qNorm = new double[numVectors];
1268 assert(queryCopy&&qNorm);
1269 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
1270 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
1271 query = queryCopy;
1272 }
1273
1274 // Make temporary dynamic memory for results
1275 assert(pointNN>0 && pointNN<=O2_MAXNN);
1276 double distances[pointNN];
1277 unsigned qIndexes[pointNN];
1278 unsigned sIndexes[pointNN];
1279 for(unsigned k=0; k<pointNN; k++){
1280 distances[k]=-DBL_MAX;
1281 qIndexes[k]=~0;
1282 sIndexes[k]=~0;
1283 }
1284
1285 unsigned j=numVectors;
1286 unsigned k,l,n;
1287 double thisDist;
1288
1289 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double));
1290 double meanQdur = 0;
1291 double *timesdata = 0;
1292 double *querydurs = 0;
1293 double *dbdurs = 0;
1294
1295 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
1296 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
1297 usingTimes=0;
1298 }
1299
1300 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
1301 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
1302
1303 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
1304 timesdata = new double[2*numVectors];
1305 querydurs = new double[numVectors];
1306 insertTimeStamps(numVectors, timesFile, timesdata);
1307 // Calculate durations of points
1308 for(k=0; k<numVectors-1; k++){
1309 querydurs[k]=timesdata[2*k+1]-timesdata[2*k];
1310 meanQdur+=querydurs[k];
1311 }
1312 meanQdur/=k;
1313 // Individual exhaustive timepoint durations
1314 dbdurs = new double[totalVecs];
1315 for(k=0; k<totalVecs-1; k++) {
1316 dbdurs[k]=timesTable[2*k+1]-timesTable[2*k];
1317 }
1318 }
1319
1320 if(usingQueryPoint)
1321 if(queryPoint>numVectors-1)
1322 error("queryPoint > numVectors in query");
1323 else{
1324 if(verbosity>1) {
1325 cerr << "query point: " << queryPoint << endl; cerr.flush();
1326 }
1327 query=query+queryPoint*dbH->dim;
1328 numVectors=queryPoint+1;
1329 j=1;
1330 }
1331
1332 gettimeofday(&tv1, NULL);
1333 while(j--){ // query
1334 data=dataBuf;
1335 k=totalVecs; // number of database vectors
1336 while(k--){ // database
1337 thisDist=0;
1338 l=dbH->dim;
1339 double* q=query;
1340 while(l--)
1341 thisDist+=*q++**data++;
1342 if(!usingTimes ||
1343 (usingTimes
1344 && fabs(dbdurs[totalVecs-k-1]-querydurs[numVectors-j-1])<querydurs[numVectors-j-1]*timesTol)){
1345 n=pointNN;
1346 while(n--){
1347 if(thisDist>=distances[n]){
1348 if((n==0 || thisDist<=distances[n-1])){
1349 // Copy all values above up the queue
1350 for( l=pointNN-1 ; l >= n+1 ; l--){
1351 distances[l]=distances[l-1];
1352 qIndexes[l]=qIndexes[l-1];
1353 sIndexes[l]=sIndexes[l-1];
1354 }
1355 distances[n]=thisDist;
1356 qIndexes[n]=numVectors-j-1;
1357 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1;
1358 break;
1359 }
1360 }
1361 else
1362 break;
1363 }
1364 }
1365 }
1366 // Move query pointer to next query point
1367 query+=dbH->dim;
1368 }
1369
1370 gettimeofday(&tv2, NULL);
1371 if(verbosity>1) {
1372 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
1373 }
1374
1375 if(adbQueryResponse==0){
1376 // Output answer
1377 // Loop over nearest neighbours
1378 for(k=0; k < pointNN; k++){
1379 // Scan for key
1380 unsigned cumTrack=0;
1381 for(l=0 ; l<dbH->numFiles; l++){
1382 cumTrack+=trackTable[l];
1383 if(sIndexes[k]<cumTrack){
1384 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " "
1385 << sIndexes[k]+trackTable[l]-cumTrack << endl;
1386 break;
1387 }
1388 }
1389 }
1390 }
1391 else{ // Process Web Services Query
1392 int listLen;
1393 for(k = 0; k < pointNN; k++) {
1394 if(distances[k] == -DBL_MAX)
1395 break;
1396 }
1397 listLen = k;
1398
1399 adbQueryResponse->result.__sizeRlist=listLen;
1400 adbQueryResponse->result.__sizeDist=listLen;
1401 adbQueryResponse->result.__sizeQpos=listLen;
1402 adbQueryResponse->result.__sizeSpos=listLen;
1403 adbQueryResponse->result.Rlist= new char*[listLen];
1404 adbQueryResponse->result.Dist = new double[listLen];
1405 adbQueryResponse->result.Qpos = new unsigned int[listLen];
1406 adbQueryResponse->result.Spos = new unsigned int[listLen];
1407 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
1408 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
1409 adbQueryResponse->result.Dist[k]=distances[k];
1410 adbQueryResponse->result.Qpos[k]=qIndexes[k];
1411 unsigned cumTrack=0;
1412 for(l=0 ; l<dbH->numFiles; l++){
1413 cumTrack+=trackTable[l];
1414 if(sIndexes[k]<cumTrack){
1415 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE);
1416 break;
1417 }
1418 }
1419 adbQueryResponse->result.Spos[k]=sIndexes[k]+trackTable[l]-cumTrack;
1420 }
1421 }
1422
1423 // Clean up
1424 if(queryCopy)
1425 delete queryCopy;
1426 if(qNorm)
1427 delete qNorm;
1428 if(timesdata)
1429 delete[] timesdata;
1430 if(querydurs)
1431 delete[] querydurs;
1432 if(dbdurs)
1433 delete dbdurs;
1434 }
1435
1436 // trackPointQuery
1437 // return the trackNN closest tracks to the query track
1438 // uses average of pointNN points per track
1439 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
1440 initTables(dbName, inFile);
1441
1442 // For each input vector, find the closest pointNN matching output vectors and report
1443 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
1444 double* query = (double*)(indata+sizeof(int));
1445 double* data;
1446 double* queryCopy = 0;
1447
1448 if( dbH->flags & O2_FLAG_L2NORM ){
1449 // Make a copy of the query
1450 queryCopy = new double[numVectors*dbH->dim];
1451 qNorm = new double[numVectors];
1452 assert(queryCopy&&qNorm);
1453 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
1454 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
1455 query = queryCopy;
1456 }
1457
1458 assert(pointNN>0 && pointNN<=O2_MAXNN);
1459 assert(trackNN>0 && trackNN<=O2_MAXNN);
1460
1461 // Make temporary dynamic memory for results
1462 double trackDistances[trackNN];
1463 unsigned trackIDs[trackNN];
1464 unsigned trackQIndexes[trackNN];
1465 unsigned trackSIndexes[trackNN];
1466
1467 double distances[pointNN];
1468 unsigned qIndexes[pointNN];
1469 unsigned sIndexes[pointNN];
1470
1471 unsigned j=numVectors; // number of query points
1472 unsigned k,l,n, track, trackOffset=0, processedTracks=0;
1473 double thisDist;
1474
1475 for(k=0; k<pointNN; k++){
1476 distances[k]=-DBL_MAX;
1477 qIndexes[k]=~0;
1478 sIndexes[k]=~0;
1479 }
1480
1481 for(k=0; k<trackNN; k++){
1482 trackDistances[k]=-DBL_MAX;
1483 trackQIndexes[k]=~0;
1484 trackSIndexes[k]=~0;
1485 trackIDs[k]=~0;
1486 }
1487
1488 double meanQdur = 0;
1489 double *timesdata = 0;
1490 double *querydurs = 0;
1491 double *meanDBdur = 0;
1492
1493 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
1494 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
1495 usingTimes=0;
1496 }
1497
1498 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
1499 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
1500
1501 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
1502 timesdata = new double[2*numVectors];
1503 querydurs = new double[numVectors];
1504 insertTimeStamps(numVectors, timesFile, timesdata);
1505 // Calculate durations of points
1506 for(k=0; k<numVectors-1; k++) {
1507 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
1508 meanQdur += querydurs[k];
1509 }
1510 meanQdur/=k;
1511 meanDBdur = new double[dbH->numFiles];
1512 for(k=0; k<dbH->numFiles; k++){
1513 meanDBdur[k]=0.0;
1514 for(j=0; j<trackTable[k]-1 ; j++) {
1515 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
1516 }
1517 meanDBdur[k]/=j;
1518 }
1519 }
1520
1521 if(usingQueryPoint)
1522 if(queryPoint>numVectors-1)
1523 error("queryPoint > numVectors in query");
1524 else{
1525 if(verbosity>1) {
1526 cerr << "query point: " << queryPoint << endl; cerr.flush();
1527 }
1528 query=query+queryPoint*dbH->dim;
1529 numVectors=queryPoint+1;
1530 }
1531
1532 // build track offset table
1533 off_t *trackOffsetTable = new off_t[dbH->numFiles];
1534 unsigned cumTrack=0;
1535 off_t trackIndexOffset;
1536 for(k=0; k<dbH->numFiles;k++){
1537 trackOffsetTable[k]=cumTrack;
1538 cumTrack+=trackTable[k]*dbH->dim;
1539 }
1540
1541 char nextKey[MAXSTR];
1542
1543 gettimeofday(&tv1, NULL);
1544
1545 size_t data_buffer_size = 0;
1546 double *data_buffer = 0;
1547 lseek(dbfid, dbH->dataOffset, SEEK_SET);
1548
1549 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
1550
1551 trackOffset = trackOffsetTable[track]; // numDoubles offset
1552
1553 // get trackID from file if using a control file
1554 if(trackFile) {
1555 trackFile->getline(nextKey,MAXSTR);
1556 if(!trackFile->eof()) {
1557 track = getKeyPos(nextKey);
1558 trackOffset = trackOffsetTable[track];
1559 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
1560 } else {
1561 break;
1562 }
1563 }
1564
1565 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
1566
1567 if(verbosity>7) {
1568 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush();
1569 }
1570
1571 if(dbH->flags & O2_FLAG_L2NORM)
1572 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy;
1573 else
1574 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int));
1575 if(usingQueryPoint)
1576 j=1;
1577 else
1578 j=numVectors;
1579
1580 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
1581 if(data_buffer) {
1582 free(data_buffer);
1583 }
1584 {
1585 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
1586 void *tmp = malloc(data_buffer_size);
1587 if (tmp == NULL) {
1588 error("error allocating data buffer");
1589 }
1590 data_buffer = (double *) tmp;
1591 }
1592 }
1593
1594 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
1595
1596 while(j--){
1597 k=trackTable[track]; // number of vectors in track
1598 data=data_buffer; // data for track
1599 while(k--){
1600 thisDist=0;
1601 l=dbH->dim;
1602 double* q=query;
1603 while(l--)
1604 thisDist+=*q++**data++;
1605 if(!usingTimes ||
1606 (usingTimes
1607 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
1608 n=pointNN;
1609 while(n--){
1610 if(thisDist>=distances[n]){
1611 if((n==0 || thisDist<=distances[n-1])){
1612 // Copy all values above up the queue
1613 for( l=pointNN-1 ; l > n ; l--){
1614 distances[l]=distances[l-1];
1615 qIndexes[l]=qIndexes[l-1];
1616 sIndexes[l]=sIndexes[l-1];
1617 }
1618 distances[n]=thisDist;
1619 qIndexes[n]=numVectors-j-1;
1620 sIndexes[n]=trackTable[track]-k-1;
1621 break;
1622 }
1623 }
1624 else
1625 break;
1626 }
1627 }
1628 } // track
1629 // Move query pointer to next query point
1630 query+=dbH->dim;
1631 } // query
1632 // Take the average of this track's distance
1633 // Test the track distances
1634 thisDist=0;
1635 for (n = 0; n < pointNN; n++) {
1636 if (distances[n] == -DBL_MAX) break;
1637 thisDist += distances[n];
1638 }
1639 thisDist /= n;
1640
1641 n=trackNN;
1642 while(n--){
1643 if(thisDist>=trackDistances[n]){
1644 if((n==0 || thisDist<=trackDistances[n-1])){
1645 // Copy all values above up the queue
1646 for( l=trackNN-1 ; l > n ; l--){
1647 trackDistances[l]=trackDistances[l-1];
1648 trackQIndexes[l]=trackQIndexes[l-1];
1649 trackSIndexes[l]=trackSIndexes[l-1];
1650 trackIDs[l]=trackIDs[l-1];
1651 }
1652 trackDistances[n]=thisDist;
1653 trackQIndexes[n]=qIndexes[0];
1654 trackSIndexes[n]=sIndexes[0];
1655 trackIDs[n]=track;
1656 break;
1657 }
1658 }
1659 else
1660 break;
1661 }
1662 for(unsigned k=0; k<pointNN; k++){
1663 distances[k]=-DBL_MAX;
1664 qIndexes[k]=~0;
1665 sIndexes[k]=~0;
1666 }
1667 } // tracks
1668
1669 free(data_buffer);
1670
1671 gettimeofday(&tv2, NULL);
1672
1673 if(verbosity>1) {
1674 cerr << endl << "processed tracks :" << processedTracks
1675 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
1676 }
1677
1678 if(adbQueryResponse==0){
1679 if(verbosity>1) {
1680 cerr<<endl;
1681 }
1682 // Output answer
1683 // Loop over nearest neighbours
1684 for(k=0; k < min(trackNN,processedTracks); k++)
1685 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE
1686 << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
1687 }
1688 else{ // Process Web Services Query
1689 int listLen = min(trackNN, processedTracks);
1690 adbQueryResponse->result.__sizeRlist=listLen;
1691 adbQueryResponse->result.__sizeDist=listLen;
1692 adbQueryResponse->result.__sizeQpos=listLen;
1693 adbQueryResponse->result.__sizeSpos=listLen;
1694 adbQueryResponse->result.Rlist= new char*[listLen];
1695 adbQueryResponse->result.Dist = new double[listLen];
1696 adbQueryResponse->result.Qpos = new unsigned int[listLen];
1697 adbQueryResponse->result.Spos = new unsigned int[listLen];
1698 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
1699 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
1700 adbQueryResponse->result.Dist[k]=trackDistances[k];
1701 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
1702 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
1703 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
1704 }
1705 }
1706
1707 // Clean up
1708 if(trackOffsetTable)
1709 delete trackOffsetTable;
1710 if(queryCopy)
1711 delete queryCopy;
1712 if(qNorm)
1713 delete qNorm;
1714 if(timesdata)
1715 delete[] timesdata;
1716 if(querydurs)
1717 delete[] querydurs;
1718 if(meanDBdur)
1719 delete meanDBdur;
1720 }
1721
1722 // This is a common pattern in sequence queries: what we are doing is
1723 // taking a window of length seqlen over a buffer of length length,
1724 // and placing the sum of the elements in that window in the first
1725 // element of the window: thus replacing all but the last seqlen
1726 // elements in the buffer the corresponding windowed sum.
1727 void audioDB::sequence_sum(double *buffer, int length, int seqlen) {
1728 double tmp1, tmp2, *ps;
1729 int j, w;
1730
1731 tmp1 = *buffer;
1732 j = 1;
1733 w = seqlen - 1;
1734 while(w--) {
1735 *buffer += buffer[j++];
1736 }
1737 ps = buffer + 1;
1738 w = length - seqlen; // +1 - 1
1739 while(w--) {
1740 tmp2 = *ps;
1741 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
1742 tmp1 = tmp2;
1743 ps++;
1744 }
1745 }
1746
1747 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) {
1748 int w = length - seqlen + 1;
1749 while(w--) {
1750 *buffer = sqrt(*buffer);
1751 buffer++;
1752 }
1753 }
1754
1755 void audioDB::sequence_average(double *buffer, int length, int seqlen) {
1756 int w = length - seqlen + 1;
1757 while(w--) {
1758 *buffer /= seqlen;
1759 buffer++;
1760 }
1761 }
1762
1763 // k nearest-neighbor (k-NN) search between query and target tracks
1764 // efficient implementation based on matched filter
1765 // assumes normed shingles
1766 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track
1767 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
1768
1769 initTables(dbName, inFile);
1770
1771 // For each input vector, find the closest pointNN matching output vectors and report
1772 // we use stdout in this stub version
1773 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
1774 double* query = (double*)(indata+sizeof(int));
1775 double* queryCopy = 0;
1776
1777 if(!(dbH->flags & O2_FLAG_L2NORM) )
1778 error("Database must be L2 normed for sequence query","use -L2NORM");
1779
1780 if(numVectors<sequenceLength)
1781 error("Query shorter than requested sequence length", "maybe use -l");
1782
1783 if(verbosity>1) {
1784 cerr << "performing norms ... "; cerr.flush();
1785 }
1786 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
1787
1788 // Make a copy of the query
1789 queryCopy = new double[numVectors*dbH->dim];
1790 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
1791 qNorm = new double[numVectors];
1792 sNorm = new double[dbVectors];
1793 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
1794 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
1795 query = queryCopy;
1796
1797 // Make norm measurements relative to sequenceLength
1798 unsigned w = sequenceLength-1;
1799 unsigned i,j;
1800
1801 // Copy the L2 norm values to core to avoid disk random access later on
1802 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
1803 double* qnPtr = qNorm;
1804 double* snPtr = sNorm;
1805
1806 double *sPower = 0, *qPower = 0;
1807 double *spPtr = 0, *qpPtr = 0;
1808
1809 if (usingPower) {
1810 if (!(dbH->flags & O2_FLAG_POWER)) {
1811 error("database not power-enabled", dbName);
1812 }
1813 sPower = new double[dbVectors];
1814 spPtr = sPower;
1815 memcpy(sPower, powerTable, dbVectors * sizeof(double));
1816 }
1817
1818 for(i=0; i<dbH->numFiles; i++){
1819 if(trackTable[i]>=sequenceLength) {
1820 sequence_sum(snPtr, trackTable[i], sequenceLength);
1821 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
1822
1823 if (usingPower) {
1824 sequence_sum(spPtr, trackTable[i], sequenceLength);
1825 sequence_average(spPtr, trackTable[i], sequenceLength);
1826 }
1827 }
1828 snPtr += trackTable[i];
1829 if (usingPower) {
1830 spPtr += trackTable[i];
1831 }
1832 }
1833
1834 sequence_sum(qnPtr, numVectors, sequenceLength);
1835 sequence_sqrt(qnPtr, numVectors, sequenceLength);
1836
1837 if (usingPower) {
1838 qPower = new double[numVectors];
1839 qpPtr = qPower;
1840 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
1841 error("error seeking to data", powerFileName, "lseek");
1842 }
1843 int count = read(powerfd, qPower, numVectors * sizeof(double));
1844 if (count == -1) {
1845 error("error reading data", powerFileName, "read");
1846 }
1847 if ((unsigned) count != numVectors * sizeof(double)) {
1848 error("short read", powerFileName);
1849 }
1850
1851 sequence_sum(qpPtr, numVectors, sequenceLength);
1852 sequence_average(qpPtr, numVectors, sequenceLength);
1853 }
1854
1855 if(verbosity>1) {
1856 cerr << "done." << endl;
1857 }
1858
1859 if(verbosity>1) {
1860 cerr << "matching tracks..." << endl;
1861 }
1862
1863 assert(pointNN>0 && pointNN<=O2_MAXNN);
1864 assert(trackNN>0 && trackNN<=O2_MAXNN);
1865
1866 // Make temporary dynamic memory for results
1867 double trackDistances[trackNN];
1868 unsigned trackIDs[trackNN];
1869 unsigned trackQIndexes[trackNN];
1870 unsigned trackSIndexes[trackNN];
1871
1872 double distances[pointNN];
1873 unsigned qIndexes[pointNN];
1874 unsigned sIndexes[pointNN];
1875
1876
1877 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
1878 double thisDist;
1879
1880 for(k=0; k<pointNN; k++){
1881 distances[k]=1.0e6;
1882 qIndexes[k]=~0;
1883 sIndexes[k]=~0;
1884 }
1885
1886 for(k=0; k<trackNN; k++){
1887 trackDistances[k]=1.0e6;
1888 trackQIndexes[k]=~0;
1889 trackSIndexes[k]=~0;
1890 trackIDs[k]=~0;
1891 }
1892
1893 // Timestamp and durations processing
1894 double meanQdur = 0;
1895 double *timesdata = 0;
1896 double *querydurs = 0;
1897 double *meanDBdur = 0;
1898
1899 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
1900 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
1901 usingTimes=0;
1902 }
1903
1904 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
1905 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
1906
1907 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
1908 timesdata = new double[2*numVectors];
1909 querydurs = new double[numVectors];
1910
1911 insertTimeStamps(numVectors, timesFile, timesdata);
1912 // Calculate durations of points
1913 for(k=0; k<numVectors-1; k++) {
1914 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
1915 meanQdur += querydurs[k];
1916 }
1917 meanQdur/=k;
1918 if(verbosity>1) {
1919 cerr << "mean query file duration: " << meanQdur << endl;
1920 }
1921 meanDBdur = new double[dbH->numFiles];
1922 assert(meanDBdur);
1923 for(k=0; k<dbH->numFiles; k++){
1924 meanDBdur[k]=0.0;
1925 for(j=0; j<trackTable[k]-1 ; j++) {
1926 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
1927 }
1928 meanDBdur[k]/=j;
1929 }
1930 }
1931
1932 if(usingQueryPoint)
1933 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
1934 error("queryPoint > numVectors-wL+1 in query");
1935 else{
1936 if(verbosity>1) {
1937 cerr << "query point: " << queryPoint << endl; cerr.flush();
1938 }
1939 query = query + queryPoint * dbH->dim;
1940 qnPtr = qnPtr + queryPoint;
1941 if (usingPower) {
1942 qpPtr = qpPtr + queryPoint;
1943 }
1944 numVectors=wL;
1945 }
1946
1947 double ** D = 0; // Differences query and target
1948 double ** DD = 0; // Matched filter distance
1949
1950 D = new double*[numVectors];
1951 assert(D);
1952 DD = new double*[numVectors];
1953 assert(DD);
1954
1955 gettimeofday(&tv1, NULL);
1956 unsigned processedTracks = 0;
1957 unsigned successfulTracks=0;
1958
1959 double* qp;
1960 double* sp;
1961 double* dp;
1962
1963 // build track offset table
1964 off_t *trackOffsetTable = new off_t[dbH->numFiles];
1965 unsigned cumTrack=0;
1966 off_t trackIndexOffset;
1967 for(k=0; k<dbH->numFiles;k++){
1968 trackOffsetTable[k]=cumTrack;
1969 cumTrack+=trackTable[k]*dbH->dim;
1970 }
1971
1972 char nextKey [MAXSTR];
1973
1974 // chi^2 statistics
1975 double sampleCount = 0;
1976 double sampleSum = 0;
1977 double logSampleSum = 0;
1978 double minSample = 1e9;
1979 double maxSample = 0;
1980
1981 // Track loop
1982 size_t data_buffer_size = 0;
1983 double *data_buffer = 0;
1984 lseek(dbfid, dbH->dataOffset, SEEK_SET);
1985
1986 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
1987
1988 trackOffset = trackOffsetTable[track]; // numDoubles offset
1989
1990 // get trackID from file if using a control file
1991 if(trackFile) {
1992 trackFile->getline(nextKey,MAXSTR);
1993 if(!trackFile->eof()) {
1994 track = getKeyPos(nextKey);
1995 trackOffset = trackOffsetTable[track];
1996 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
1997 } else {
1998 break;
1999 }
2000 }
2001
2002 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
2003
2004 if(sequenceLength<=trackTable[track]){ // test for short sequences
2005
2006 if(verbosity>7) {
2007 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
2008 }
2009
2010 // Sum products matrix
2011 for(j=0; j<numVectors;j++){
2012 D[j]=new double[trackTable[track]];
2013 assert(D[j]);
2014
2015 }
2016
2017 // Matched filter matrix
2018 for(j=0; j<numVectors;j++){
2019 DD[j]=new double[trackTable[track]];
2020 assert(DD[j]);
2021 }
2022
2023 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
2024 if(data_buffer) {
2025 free(data_buffer);
2026 }
2027 {
2028 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
2029 void *tmp = malloc(data_buffer_size);
2030 if (tmp == NULL) {
2031 error("error allocating data buffer");
2032 }
2033 data_buffer = (double *) tmp;
2034 }
2035 }
2036
2037 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
2038
2039 // Dot product
2040 for(j=0; j<numVectors; j++)
2041 for(k=0; k<trackTable[track]; k++){
2042 qp=query+j*dbH->dim;
2043 sp=data_buffer+k*dbH->dim;
2044 DD[j][k]=0.0; // Initialize matched filter array
2045 dp=&D[j][k]; // point to correlation cell j,k
2046 *dp=0.0; // initialize correlation cell
2047 l=dbH->dim; // size of vectors
2048 while(l--)
2049 *dp+=*qp++**sp++;
2050 }
2051
2052 // Matched Filter
2053 // HOP SIZE == 1
2054 double* spd;
2055 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
2056 for(w=0; w<wL; w++)
2057 for(j=0; j<numVectors-w; j++){
2058 sp=DD[j];
2059 spd=D[j+w]+w;
2060 k=trackTable[track]-w;
2061 while(k--)
2062 *sp+++=*spd++;
2063 }
2064 }
2065
2066 else{ // HOP_SIZE != 1
2067 for(w=0; w<wL; w++)
2068 for(j=0; j<numVectors-w; j+=HOP_SIZE){
2069 sp=DD[j];
2070 spd=D[j+w]+w;
2071 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
2072 *sp+=*spd;
2073 sp+=HOP_SIZE;
2074 spd+=HOP_SIZE;
2075 }
2076 }
2077 }
2078
2079 if(verbosity>3 && usingTimes) {
2080 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
2081 cerr.flush();
2082 }
2083
2084 if(!usingTimes ||
2085 (usingTimes
2086 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
2087
2088 if(verbosity>3 && usingTimes) {
2089 cerr << "within duration tolerance." << endl;
2090 cerr.flush();
2091 }
2092
2093 // Search for minimum distance by shingles (concatenated vectors)
2094 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
2095 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
2096 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
2097 if(verbosity>9) {
2098 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl;
2099 }
2100 // Gather chi^2 statistics
2101 if(thisDist<minSample)
2102 minSample=thisDist;
2103 else if(thisDist>maxSample)
2104 maxSample=thisDist;
2105 if(thisDist>1e-9){
2106 sampleCount++;
2107 sampleSum+=thisDist;
2108 logSampleSum+=log(thisDist);
2109 }
2110
2111 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
2112 // Power test
2113 if (usingPower) {
2114 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
2115 thisDist = 1000000.0;
2116 }
2117 }
2118
2119 // k-NN match algorithm
2120 m=pointNN;
2121 while(m--){
2122 if(thisDist<=distances[m])
2123 if(m==0 || thisDist>=distances[m-1]){
2124 // Shuffle distances up the list
2125 for(l=pointNN-1; l>m; l--){
2126 distances[l]=distances[l-1];
2127 qIndexes[l]=qIndexes[l-1];
2128 sIndexes[l]=sIndexes[l-1];
2129 }
2130 distances[m]=thisDist;
2131 if(usingQueryPoint)
2132 qIndexes[m]=queryPoint;
2133 else
2134 qIndexes[m]=j;
2135 sIndexes[m]=k;
2136 break;
2137 }
2138 }
2139 }
2140 // Calculate the mean of the N-Best matches
2141 thisDist=0.0;
2142 for(m=0; m<pointNN; m++) {
2143 if (distances[m] == 1000000.0) break;
2144 thisDist+=distances[m];
2145 }
2146 thisDist/=m;
2147
2148 // Let's see the distances then...
2149 if(verbosity>3) {
2150 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;
2151 }
2152
2153
2154 // All the track stuff goes here
2155 n=trackNN;
2156 while(n--){
2157 if(thisDist<=trackDistances[n]){
2158 if((n==0 || thisDist>=trackDistances[n-1])){
2159 // Copy all values above up the queue
2160 for( l=trackNN-1 ; l > n ; l--){
2161 trackDistances[l]=trackDistances[l-1];
2162 trackQIndexes[l]=trackQIndexes[l-1];
2163 trackSIndexes[l]=trackSIndexes[l-1];
2164 trackIDs[l]=trackIDs[l-1];
2165 }
2166 trackDistances[n]=thisDist;
2167 trackQIndexes[n]=qIndexes[0];
2168 trackSIndexes[n]=sIndexes[0];
2169 successfulTracks++;
2170 trackIDs[n]=track;
2171 break;
2172 }
2173 }
2174 else
2175 break;
2176 }
2177 } // Duration match
2178
2179 // Clean up current track
2180 if(D!=NULL){
2181 for(j=0; j<numVectors; j++)
2182 delete[] D[j];
2183 }
2184
2185 if(DD!=NULL){
2186 for(j=0; j<numVectors; j++)
2187 delete[] DD[j];
2188 }
2189 }
2190 // per-track reset array values
2191 for(unsigned k=0; k<pointNN; k++){
2192 distances[k]=1.0e6;
2193 qIndexes[k]=~0;
2194 sIndexes[k]=~0;
2195 }
2196 }
2197
2198 free(data_buffer);
2199
2200 gettimeofday(&tv2,NULL);
2201 if(verbosity>1) {
2202 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
2203 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
2204 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
2205 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
2206 }
2207 if(adbQueryResponse==0){
2208 if(verbosity>1) {
2209 cerr<<endl;
2210 }
2211 // Output answer
2212 // Loop over nearest neighbours
2213 for(k=0; k < min(trackNN,successfulTracks); k++)
2214 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " "
2215 << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
2216 }
2217 else{ // Process Web Services Query
2218 int listLen = min(trackNN, processedTracks);
2219 adbQueryResponse->result.__sizeRlist=listLen;
2220 adbQueryResponse->result.__sizeDist=listLen;
2221 adbQueryResponse->result.__sizeQpos=listLen;
2222 adbQueryResponse->result.__sizeSpos=listLen;
2223 adbQueryResponse->result.Rlist= new char*[listLen];
2224 adbQueryResponse->result.Dist = new double[listLen];
2225 adbQueryResponse->result.Qpos = new unsigned int[listLen];
2226 adbQueryResponse->result.Spos = new unsigned int[listLen];
2227 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
2228 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
2229 adbQueryResponse->result.Dist[k]=trackDistances[k];
2230 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
2231 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
2232 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
2233 }
2234 }
2235
2236 // Clean up
2237 if(trackOffsetTable)
2238 delete[] trackOffsetTable;
2239 if(queryCopy)
2240 delete[] queryCopy;
2241 if(qNorm)
2242 delete[] qNorm;
2243 if(sNorm)
2244 delete[] sNorm;
2245 if(qPower)
2246 delete[] qPower;
2247 if(sPower)
2248 delete[] sPower;
2249 if(D)
2250 delete[] D;
2251 if(DD)
2252 delete[] DD;
2253 if(timesdata)
2254 delete[] timesdata;
2255 if(querydurs)
2256 delete[] querydurs;
2257 if(meanDBdur)
2258 delete[] meanDBdur;
2259 }
2260
2261 // Radius search between query and target tracks
2262 // efficient implementation based on matched filter
2263 // assumes normed shingles
2264 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track
2265 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
2266
2267 initTables(dbName, inFile);
2268
2269 // For each input vector, find the closest pointNN matching output vectors and report
2270 // we use stdout in this stub version
2271 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
2272 double* query = (double*)(indata+sizeof(int));
2273 double* queryCopy = 0;
2274
2275 if(!(dbH->flags & O2_FLAG_L2NORM) )
2276 error("Database must be L2 normed for sequence query","use -l2norm");
2277
2278 if(verbosity>1) {
2279 cerr << "performing norms ... "; cerr.flush();
2280 }
2281 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
2282
2283 // Make a copy of the query
2284 queryCopy = new double[numVectors*dbH->dim];
2285 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
2286 qNorm = new double[numVectors];
2287 sNorm = new double[dbVectors];
2288 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
2289 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
2290 query = queryCopy;
2291
2292 // Make norm measurements relative to sequenceLength
2293 unsigned w = sequenceLength-1;
2294 unsigned i,j;
2295
2296 // Copy the L2 norm values to core to avoid disk random access later on
2297 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
2298 double* snPtr = sNorm;
2299 double* qnPtr = qNorm;
2300
2301 double *sPower = 0, *qPower = 0;
2302 double *spPtr = 0, *qpPtr = 0;
2303
2304 if (usingPower) {
2305 if(!(dbH->flags & O2_FLAG_POWER)) {
2306 error("database not power-enabled", dbName);
2307 }
2308 sPower = new double[dbVectors];
2309 spPtr = sPower;
2310 memcpy(sPower, powerTable, dbVectors * sizeof(double));
2311 }
2312
2313 for(i=0; i<dbH->numFiles; i++){
2314 if(trackTable[i]>=sequenceLength) {
2315 sequence_sum(snPtr, trackTable[i], sequenceLength);
2316 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
2317 if (usingPower) {
2318 sequence_sum(spPtr, trackTable[i], sequenceLength);
2319 sequence_average(spPtr, trackTable[i], sequenceLength);
2320 }
2321 }
2322 snPtr += trackTable[i];
2323 if (usingPower) {
2324 spPtr += trackTable[i];
2325 }
2326 }
2327
2328 sequence_sum(qnPtr, numVectors, sequenceLength);
2329 sequence_sqrt(qnPtr, numVectors, sequenceLength);
2330
2331 if (usingPower) {
2332 qPower = new double[numVectors];
2333 qpPtr = qPower;
2334 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
2335 error("error seeking to data", powerFileName, "lseek");
2336 }
2337 int count = read(powerfd, qPower, numVectors * sizeof(double));
2338 if (count == -1) {
2339 error("error reading data", powerFileName, "read");
2340 }
2341 if ((unsigned) count != numVectors * sizeof(double)) {
2342 error("short read", powerFileName);
2343 }
2344
2345 sequence_sum(qpPtr, numVectors, sequenceLength);
2346 sequence_average(qpPtr, numVectors, sequenceLength);
2347 }
2348
2349 if(verbosity>1) {
2350 cerr << "done." << endl;
2351 }
2352
2353 if(verbosity>1) {
2354 cerr << "matching tracks..." << endl;
2355 }
2356
2357 assert(pointNN>0 && pointNN<=O2_MAXNN);
2358 assert(trackNN>0 && trackNN<=O2_MAXNN);
2359
2360 // Make temporary dynamic memory for results
2361 double trackDistances[trackNN];
2362 unsigned trackIDs[trackNN];
2363 unsigned trackQIndexes[trackNN];
2364 unsigned trackSIndexes[trackNN];
2365
2366 double distances[pointNN];
2367 unsigned qIndexes[pointNN];
2368 unsigned sIndexes[pointNN];
2369
2370
2371 unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
2372 double thisDist;
2373
2374 for(k=0; k<pointNN; k++){
2375 distances[k]=0.0;
2376 qIndexes[k]=~0;
2377 sIndexes[k]=~0;
2378 }
2379
2380 for(k=0; k<trackNN; k++){
2381 trackDistances[k]=0.0;
2382 trackQIndexes[k]=~0;
2383 trackSIndexes[k]=~0;
2384 trackIDs[k]=~0;
2385 }
2386
2387 // Timestamp and durations processing
2388 double meanQdur = 0;
2389 double *timesdata = 0;
2390 double *querydurs = 0;
2391 double *meanDBdur = 0;
2392
2393 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
2394 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
2395 usingTimes=0;
2396 }
2397
2398 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
2399 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
2400
2401 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
2402 timesdata = new double[2*numVectors];
2403 querydurs = new double[numVectors];
2404
2405 insertTimeStamps(numVectors, timesFile, timesdata);
2406 // Calculate durations of points
2407 for(k=0; k<numVectors-1; k++){
2408 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
2409 meanQdur += querydurs[k];
2410 }
2411 meanQdur/=k;
2412 if(verbosity>1) {
2413 cerr << "mean query file duration: " << meanQdur << endl;
2414 }
2415 meanDBdur = new double[dbH->numFiles];
2416 assert(meanDBdur);
2417 for(k=0; k<dbH->numFiles; k++){
2418 meanDBdur[k]=0.0;
2419 for(j=0; j<trackTable[k]-1 ; j++) {
2420 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
2421 }
2422 meanDBdur[k]/=j;
2423 }
2424 }
2425
2426 if(usingQueryPoint)
2427 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
2428 error("queryPoint > numVectors-wL+1 in query");
2429 else{
2430 if(verbosity>1) {
2431 cerr << "query point: " << queryPoint << endl; cerr.flush();
2432 }
2433 query = query + queryPoint*dbH->dim;
2434 qnPtr = qnPtr + queryPoint;
2435 if (usingPower) {
2436 qpPtr = qpPtr + queryPoint;
2437 }
2438 numVectors=wL;
2439 }
2440
2441 double ** D = 0; // Differences query and target
2442 double ** DD = 0; // Matched filter distance
2443
2444 D = new double*[numVectors];
2445 assert(D);
2446 DD = new double*[numVectors];
2447 assert(DD);
2448
2449 gettimeofday(&tv1, NULL);
2450 unsigned processedTracks = 0;
2451 unsigned successfulTracks=0;
2452
2453 double* qp;
2454 double* sp;
2455 double* dp;
2456
2457 // build track offset table
2458 off_t *trackOffsetTable = new off_t[dbH->numFiles];
2459 unsigned cumTrack=0;
2460 off_t trackIndexOffset;
2461 for(k=0; k<dbH->numFiles;k++){
2462 trackOffsetTable[k]=cumTrack;
2463 cumTrack+=trackTable[k]*dbH->dim;
2464 }
2465
2466 char nextKey [MAXSTR];
2467
2468 // chi^2 statistics
2469 double sampleCount = 0;
2470 double sampleSum = 0;
2471 double logSampleSum = 0;
2472 double minSample = 1e9;
2473 double maxSample = 0;
2474
2475 // Track loop
2476 size_t data_buffer_size = 0;
2477 double *data_buffer = 0;
2478 lseek(dbfid, dbH->dataOffset, SEEK_SET);
2479
2480 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
2481
2482 trackOffset = trackOffsetTable[track]; // numDoubles offset
2483
2484 // get trackID from file if using a control file
2485 if(trackFile) {
2486 trackFile->getline(nextKey,MAXSTR);
2487 if(!trackFile->eof()) {
2488 track = getKeyPos(nextKey);
2489 trackOffset = trackOffsetTable[track];
2490 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
2491 } else {
2492 break;
2493 }
2494 }
2495
2496 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
2497
2498 if(sequenceLength<=trackTable[track]){ // test for short sequences
2499
2500 if(verbosity>7) {
2501 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
2502 }
2503
2504 // Sum products matrix
2505 for(j=0; j<numVectors;j++){
2506 D[j]=new double[trackTable[track]];
2507 assert(D[j]);
2508
2509 }
2510
2511 // Matched filter matrix
2512 for(j=0; j<numVectors;j++){
2513 DD[j]=new double[trackTable[track]];
2514 assert(DD[j]);
2515 }
2516
2517 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
2518 if(data_buffer) {
2519 free(data_buffer);
2520 }
2521 {
2522 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
2523 void *tmp = malloc(data_buffer_size);
2524 if (tmp == NULL) {
2525 error("error allocating data buffer");
2526 }
2527 data_buffer = (double *) tmp;
2528 }
2529 }
2530
2531 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
2532
2533 // Dot product
2534 for(j=0; j<numVectors; j++)
2535 for(k=0; k<trackTable[track]; k++){
2536 qp=query+j*dbH->dim;
2537 sp=data_buffer+k*dbH->dim;
2538 DD[j][k]=0.0; // Initialize matched filter array
2539 dp=&D[j][k]; // point to correlation cell j,k
2540 *dp=0.0; // initialize correlation cell
2541 l=dbH->dim; // size of vectors
2542 while(l--)
2543 *dp+=*qp++**sp++;
2544 }
2545
2546 // Matched Filter
2547 // HOP SIZE == 1
2548 double* spd;
2549 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
2550 for(w=0; w<wL; w++)
2551 for(j=0; j<numVectors-w; j++){
2552 sp=DD[j];
2553 spd=D[j+w]+w;
2554 k=trackTable[track]-w;
2555 while(k--)
2556 *sp+++=*spd++;
2557 }
2558 }
2559
2560 else{ // HOP_SIZE != 1
2561 for(w=0; w<wL; w++)
2562 for(j=0; j<numVectors-w; j+=HOP_SIZE){
2563 sp=DD[j];
2564 spd=D[j+w]+w;
2565 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
2566 *sp+=*spd;
2567 sp+=HOP_SIZE;
2568 spd+=HOP_SIZE;
2569 }
2570 }
2571 }
2572
2573 if(verbosity>3 && usingTimes) {
2574 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
2575 cerr.flush();
2576 }
2577
2578 if(!usingTimes ||
2579 (usingTimes
2580 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
2581
2582 if(verbosity>3 && usingTimes) {
2583 cerr << "within duration tolerance." << endl;
2584 cerr.flush();
2585 }
2586
2587 // Search for minimum distance by shingles (concatenated vectors)
2588 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
2589 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
2590 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
2591 if(verbosity>9) {
2592 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl;
2593 }
2594 // Gather chi^2 statistics
2595 if(thisDist<minSample)
2596 minSample=thisDist;
2597 else if(thisDist>maxSample)
2598 maxSample=thisDist;
2599 if(thisDist>1e-9){
2600 sampleCount++;
2601 sampleSum+=thisDist;
2602 logSampleSum+=log(thisDist);
2603 }
2604
2605 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
2606 // Power test
2607 if (usingPower) {
2608 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
2609 thisDist = 1000000.0;
2610 }
2611 }
2612
2613 if(thisDist>=0 && thisDist<=radius){
2614 distances[0]++; // increment count
2615 break; // only need one track point per query point
2616 }
2617 }
2618 // How many points were below threshold ?
2619 thisDist=distances[0];
2620
2621 // Let's see the distances then...
2622 if(verbosity>3) {
2623 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;
2624 }
2625
2626 // All the track stuff goes here
2627 n=trackNN;
2628 while(n--){
2629 if(thisDist>trackDistances[n]){
2630 if((n==0 || thisDist<=trackDistances[n-1])){
2631 // Copy all values above up the queue
2632 for( l=trackNN-1 ; l > n ; l--){
2633 trackDistances[l]=trackDistances[l-1];
2634 trackQIndexes[l]=trackQIndexes[l-1];
2635 trackSIndexes[l]=trackSIndexes[l-1];
2636 trackIDs[l]=trackIDs[l-1];
2637 }
2638 trackDistances[n]=thisDist;
2639 trackQIndexes[n]=qIndexes[0];
2640 trackSIndexes[n]=sIndexes[0];
2641 successfulTracks++;
2642 trackIDs[n]=track;
2643 break;
2644 }
2645 }
2646 else
2647 break;
2648 }
2649 } // Duration match
2650
2651 // Clean up current track
2652 if(D!=NULL){
2653 for(j=0; j<numVectors; j++)
2654 delete[] D[j];
2655 }
2656
2657 if(DD!=NULL){
2658 for(j=0; j<numVectors; j++)
2659 delete[] DD[j];
2660 }
2661 }
2662 // per-track reset array values
2663 for(unsigned k=0; k<pointNN; k++){
2664 distances[k]=0.0;
2665 qIndexes[k]=~0;
2666 sIndexes[k]=~0;
2667 }
2668 }
2669
2670 free(data_buffer);
2671
2672 gettimeofday(&tv2,NULL);
2673 if(verbosity>1) {
2674 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
2675 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
2676 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
2677 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
2678 }
2679
2680 if(adbQueryResponse==0){
2681 if(verbosity>1) {
2682 cerr<<endl;
2683 }
2684 // Output answer
2685 // Loop over nearest neighbours
2686 for(k=0; k < min(trackNN,successfulTracks); k++)
2687 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << endl;
2688 }
2689 else{ // Process Web Services Query
2690 int listLen = min(trackNN, processedTracks);
2691 adbQueryResponse->result.__sizeRlist=listLen;
2692 adbQueryResponse->result.__sizeDist=listLen;
2693 adbQueryResponse->result.__sizeQpos=listLen;
2694 adbQueryResponse->result.__sizeSpos=listLen;
2695 adbQueryResponse->result.Rlist= new char*[listLen];
2696 adbQueryResponse->result.Dist = new double[listLen];
2697 adbQueryResponse->result.Qpos = new unsigned int[listLen];
2698 adbQueryResponse->result.Spos = new unsigned int[listLen];
2699 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
2700 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
2701 adbQueryResponse->result.Dist[k]=trackDistances[k];
2702 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
2703 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
2704 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
2705 }
2706 }
2707
2708 // Clean up
2709 if(trackOffsetTable)
2710 delete[] trackOffsetTable;
2711 if(queryCopy)
2712 delete[] queryCopy;
2713 if(qNorm)
2714 delete[] qNorm;
2715 if(sNorm)
2716 delete[] sNorm;
2717 if(qPower)
2718 delete[] qPower;
2719 if(sPower)
2720 delete[] sPower;
2721 if(D)
2722 delete[] D;
2723 if(DD)
2724 delete[] DD;
2725 if(timesdata)
2726 delete[] timesdata;
2727 if(querydurs)
2728 delete[] querydurs;
2729 if(meanDBdur)
2730 delete[] meanDBdur;
2731 }
2732
2733 // Unit norm block of features 417 // Unit norm block of features
2734 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ 418
2735 unsigned d; 419 /* FIXME: in fact this does not unit norm a block of features, it just
2736 double L2, *p; 420 records the L2 norms somewhere. unitNorm() does in fact unit norm
2737 if(verbosity>2) { 421 a block of features. */
2738 cerr << "norming " << n << " vectors...";cerr.flush();
2739 }
2740 while(n--){
2741 p=X;
2742 L2=0.0;
2743 d=dim;
2744 while(d--){
2745 L2+=*p**p;
2746 p++;
2747 }
2748 /* L2=sqrt(L2);*/
2749 if(qNorm)
2750 *qNorm++=L2;
2751 /*
2752 oneOverL2 = 1.0/L2;
2753 d=dim;
2754 while(d--){
2755 *X*=oneOverL2;
2756 X++;
2757 */
2758 X+=dim;
2759 }
2760 if(verbosity>2) {
2761 cerr << "done..." << endl;
2762 }
2763 }
2764
2765 // Unit norm block of features
2766 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ 422 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
2767 unsigned d; 423 unsigned d;
2768 double *p; 424 double *p;
2769 unsigned nn = n; 425 unsigned nn = n;
2770 426
2772 428
2773 if( !append && (dbH->flags & O2_FLAG_L2NORM) ) 429 if( !append && (dbH->flags & O2_FLAG_L2NORM) )
2774 error("Database is already L2 normed", "automatic norm on insert is enabled"); 430 error("Database is already L2 normed", "automatic norm on insert is enabled");
2775 431
2776 if(verbosity>2) { 432 if(verbosity>2) {
2777 cerr << "norming " << n << " vectors...";cerr.flush(); 433 std::cerr << "norming " << n << " vectors...";std::cerr.flush();
2778 } 434 }
2779 435
2780 double* l2buf = new double[n]; 436 double* l2buf = new double[n];
2781 double* l2ptr = l2buf; 437 double* l2ptr = l2buf;
2782 assert(l2buf); 438 assert(l2buf);
2805 } 461 }
2806 memcpy(l2normTable+offset, l2buf, n*sizeof(double)); 462 memcpy(l2normTable+offset, l2buf, n*sizeof(double));
2807 if(l2buf) 463 if(l2buf)
2808 delete[] l2buf; 464 delete[] l2buf;
2809 if(verbosity>2) { 465 if(verbosity>2) {
2810 cerr << "done..." << endl; 466 std::cerr << "done..." << std::endl;
2811 }
2812 }
2813
2814
2815 // Start an audioDB server on the host
2816 void audioDB::startServer(){
2817 struct soap soap;
2818 int m, s; // master and slave sockets
2819 soap_init(&soap);
2820 // FIXME: largely this use of SO_REUSEADDR is to make writing (and
2821 // running) test cases more convenient, so that multiple test runs
2822 // in close succession don't fail because of a bin() error.
2823 // Investigate whether there are any potential drawbacks in this,
2824 // and also whether there's a better way to write the tests. --
2825 // CSR, 2007-10-03
2826 soap.bind_flags |= SO_REUSEADDR;
2827 m = soap_bind(&soap, NULL, port, 100);
2828 if (m < 0)
2829 soap_print_fault(&soap, stderr);
2830 else
2831 {
2832 fprintf(stderr, "Socket connection successful: master socket = %d\n", m);
2833 for (int i = 1; ; i++)
2834 {
2835 s = soap_accept(&soap);
2836 if (s < 0)
2837 {
2838 soap_print_fault(&soap, stderr);
2839 break;
2840 }
2841 fprintf(stderr, "%d: accepted connection from IP=%lu.%lu.%lu.%lu socket=%d\n", i,
2842 (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s);
2843 if (soap_serve(&soap) != SOAP_OK) // process RPC request
2844 soap_print_fault(&soap, stderr); // print error
2845 fprintf(stderr, "request served\n");
2846 soap_destroy(&soap); // clean up class instances
2847 soap_end(&soap); // clean up everything and close socket
2848 }
2849 }
2850 soap_done(&soap); // close master socket and detach environment
2851 }
2852
2853
2854 // web services
2855
2856 // SERVER SIDE
2857 int adb__status(struct soap* soap, xsd__string dbName, adb__statusResponse &adbStatusResponse){
2858 char* const argv[]={"audioDB",COM_STATUS,"-d",dbName};
2859 const unsigned argc = 4;
2860 try {
2861 audioDB(argc, argv, &adbStatusResponse);
2862 return SOAP_OK;
2863 } catch(char *err) {
2864 soap_receiver_fault(soap, err, "");
2865 return SOAP_FAULT;
2866 }
2867 }
2868
2869 // Literal translation of command line to web service
2870
2871 int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResponse &adbQueryResponse){
2872 char queryType[256];
2873 for(int k=0; k<256; k++)
2874 queryType[k]='\0';
2875 if(qType == O2_POINT_QUERY)
2876 strncpy(queryType, "point", strlen("point"));
2877 else if (qType == O2_SEQUENCE_QUERY)
2878 strncpy(queryType, "sequence", strlen("sequence"));
2879 else if(qType == O2_TRACK_QUERY)
2880 strncpy(queryType,"track", strlen("track"));
2881 else
2882 strncpy(queryType, "", strlen(""));
2883
2884 if(pointNN==0)
2885 pointNN=10;
2886 if(trackNN==0)
2887 trackNN=10;
2888 if(seqLen==0)
2889 seqLen=16;
2890
2891 char qPosStr[256];
2892 sprintf(qPosStr, "%d", qPos);
2893 char pointNNStr[256];
2894 sprintf(pointNNStr,"%d",pointNN);
2895 char trackNNStr[256];
2896 sprintf(trackNNStr,"%d",trackNN);
2897 char seqLenStr[256];
2898 sprintf(seqLenStr,"%d",seqLen);
2899
2900 const char* argv[] ={
2901 "./audioDB",
2902 COM_QUERY,
2903 queryType, // Need to pass a parameter
2904 COM_DATABASE,
2905 ENSURE_STRING(dbName),
2906 COM_FEATURES,
2907 ENSURE_STRING(qKey),
2908 COM_KEYLIST,
2909 ENSURE_STRING(keyList),
2910 COM_TIMES,
2911 ENSURE_STRING(timesFileName),
2912 COM_QPOINT,
2913 qPosStr,
2914 COM_POINTNN,
2915 pointNNStr,
2916 COM_TRACKNN,
2917 trackNNStr, // Need to pass a parameter
2918 COM_SEQLEN,
2919 seqLenStr
2920 };
2921
2922 const unsigned argc = 19;
2923 try {
2924 audioDB(argc, (char* const*)argv, &adbQueryResponse);
2925 return SOAP_OK;
2926 } catch (char *err) {
2927 soap_receiver_fault(soap, err, "");
2928 return SOAP_FAULT;
2929 }
2930 }
2931
2932 int adb__sequenceQuery(struct soap* soap, xsd__string dbName, xsd__string qKey,
2933 adb__sequenceQueryParms *parms,
2934 adb__queryResponse &adbQueryResponse) {
2935
2936 char qPosStr[256];
2937 char pointNNStr[256];
2938 char trackNNStr[256];
2939 char seqLenStr[256];
2940 char relative_thresholdStr[256];
2941 char absolute_thresholdStr[256];
2942
2943 /* When the branch is merged, move this to a header and use it
2944 elsewhere */
2945 #define INTSTRINGIFY(val, str) \
2946 snprintf(str, 256, "%d", val);
2947 #define DOUBLESTRINGIFY(val, str) \
2948 snprintf(str, 256, "%f", val);
2949
2950 INTSTRINGIFY(parms->qPos, qPosStr);
2951 INTSTRINGIFY(parms->pointNN, pointNNStr);
2952 INTSTRINGIFY(parms->segNN, trackNNStr);
2953 /* FIXME: decide which of segLen and seqLen should live */
2954 INTSTRINGIFY(parms->segLen, seqLenStr);
2955
2956 DOUBLESTRINGIFY(parms->relative_threshold, relative_thresholdStr);
2957 DOUBLESTRINGIFY(parms->absolute_threshold, absolute_thresholdStr);
2958
2959 const char *argv[] = {
2960 "./audioDB",
2961 COM_QUERY,
2962 "sequence",
2963 COM_DATABASE,
2964 dbName,
2965 COM_FEATURES,
2966 qKey,
2967 COM_KEYLIST,
2968 /* FIXME: when this branch is merged, use ENSURE_STRING */
2969 parms->keyList==0?"":parms->keyList,
2970 COM_TIMES,
2971 parms->timesFileName==0?"":parms->timesFileName,
2972 COM_QUERYPOWER,
2973 parms->powerFileName==0?"":parms->powerFileName,
2974 COM_QPOINT,
2975 qPosStr,
2976 COM_POINTNN,
2977 pointNNStr,
2978 COM_TRACKNN,
2979 trackNNStr,
2980 COM_SEQLEN,
2981 seqLenStr,
2982 COM_RELATIVE_THRESH,
2983 relative_thresholdStr,
2984 COM_ABSOLUTE_THRESH,
2985 absolute_thresholdStr
2986 };
2987
2988 const unsigned argc = 25;
2989
2990 try {
2991 audioDB(argc, (char* const*)argv, &adbQueryResponse);
2992 return SOAP_OK;
2993 } catch (char *err) {
2994 soap_receiver_fault(soap, err, "");
2995 return SOAP_FAULT;
2996 } 467 }
2997 } 468 }
2998 469
2999 int main(const unsigned argc, char* const argv[]){ 470 int main(const unsigned argc, char* const argv[]){
3000 audioDB(argc, argv); 471 audioDB(argc, argv);