Mercurial > hg > audiodb
comparison audioDB.cpp @ 204:2ea1908707c7 refactoring
Filewise refactor.
Break apart huge monolithic audioDB.cpp file into seven broadly
independent portions:
* SOAP
* DB creation
* insertion
* query
* dump
* common functionality
* constructor functions
Remove the "using namespace std" from the header file, though that
wasn't actually a problem: the problem in question is solved by
including adb.nsmap in only soap.cpp.
Makefile improvements.
author | mas01cr |
---|---|
date | Wed, 28 Nov 2007 15:10:28 +0000 |
parents | 7c9feaceeab5 |
children | 9c3396bab02e |
comparison
equal
deleted
inserted
replaced
203:4b05c5bbf06d | 204:2ea1908707c7 |
---|---|
1 #include "audioDB.h" | 1 #include "audioDB.h" |
2 | |
3 #if defined(O2_DEBUG) | |
4 void sigterm_action(int signal, siginfo_t *info, void *context) { | |
5 exit(128+signal); | |
6 } | |
7 | |
8 void sighup_action(int signal, siginfo_t *info, void *context) { | |
9 // FIXME: reread any configuration files | |
10 } | |
11 #endif | |
12 | |
13 void audioDB::error(const char* a, const char* b, const char *sysFunc) { | |
14 if(isServer) { | |
15 /* FIXME: I think this is leaky -- we never delete err. actually | |
16 deleting it is tricky, though; it gets placed into some | |
17 soap-internal struct with uncertain extent... -- CSR, | |
18 2007-10-01 */ | |
19 char *err = new char[256]; /* FIXME: overflows */ | |
20 snprintf(err, 255, "%s: %s\n%s", a, b, sysFunc ? strerror(errno) : ""); | |
21 /* FIXME: actually we could usefully do with a properly structured | |
22 type, so that we can throw separate faultstring and details. | |
23 -- CSR, 2007-10-01 */ | |
24 throw(err); | |
25 } else { | |
26 cerr << a << ": " << b << endl; | |
27 if (sysFunc) { | |
28 perror(sysFunc); | |
29 } | |
30 exit(1); | |
31 } | |
32 } | |
33 | 2 |
34 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS | 3 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS |
35 { | 4 { |
36 if(processArgs(argc, argv)<0){ | 5 if(processArgs(argc, argv)<0){ |
37 printf("No command found.\n"); | 6 printf("No command found.\n"); |
159 } | 128 } |
160 | 129 |
161 if(args_info.verbosity_given){ | 130 if(args_info.verbosity_given){ |
162 verbosity=args_info.verbosity_arg; | 131 verbosity=args_info.verbosity_arg; |
163 if(verbosity<0 || verbosity>10){ | 132 if(verbosity<0 || verbosity>10){ |
164 cerr << "Warning: verbosity out of range, setting to 1" << endl; | 133 std::cerr << "Warning: verbosity out of range, setting to 1" << std::endl; |
165 verbosity=1; | 134 verbosity=1; |
166 } | 135 } |
167 } | 136 } |
168 | 137 |
169 if(args_info.size_given) { | 138 if(args_info.size_given) { |
178 if(radius<=0 || radius>1000000000){ | 147 if(radius<=0 || radius>1000000000){ |
179 error("radius out of range"); | 148 error("radius out of range"); |
180 } | 149 } |
181 else | 150 else |
182 if(verbosity>3) { | 151 if(verbosity>3) { |
183 cerr << "Setting radius to " << radius << endl; | 152 std::cerr << "Setting radius to " << radius << std::endl; |
184 } | 153 } |
185 } | 154 } |
186 | 155 |
187 if(args_info.SERVER_given){ | 156 if(args_info.SERVER_given){ |
188 command=COM_SERVER; | 157 command=COM_SERVER; |
247 if(args_info.key_given) | 216 if(args_info.key_given) |
248 key=args_info.key_arg; | 217 key=args_info.key_arg; |
249 if(args_info.times_given){ | 218 if(args_info.times_given){ |
250 timesFileName=args_info.times_arg; | 219 timesFileName=args_info.times_arg; |
251 if(strlen(timesFileName)>0){ | 220 if(strlen(timesFileName)>0){ |
252 if(!(timesFile = new ifstream(timesFileName,ios::in))) | 221 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) |
253 error("Could not open times file for reading", timesFileName); | 222 error("Could not open times file for reading", timesFileName); |
254 usingTimes=1; | 223 usingTimes=1; |
255 } | 224 } |
256 } | 225 } |
257 if (args_info.power_given) { | 226 if (args_info.power_given) { |
274 key=args_info.keyList_arg; // INCONSISTENT NO CHECK | 243 key=args_info.keyList_arg; // INCONSISTENT NO CHECK |
275 | 244 |
276 /* TO DO: REPLACE WITH | 245 /* TO DO: REPLACE WITH |
277 if(args_info.keyList_given){ | 246 if(args_info.keyList_given){ |
278 trackFileName=args_info.keyList_arg; | 247 trackFileName=args_info.keyList_arg; |
279 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) | 248 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in))) |
280 error("Could not open keyList file for reading",trackFileName); | 249 error("Could not open keyList file for reading",trackFileName); |
281 } | 250 } |
282 AND UPDATE BATCHINSERT() | 251 AND UPDATE BATCHINSERT() |
283 */ | 252 */ |
284 | 253 |
285 if(args_info.timesList_given){ | 254 if(args_info.timesList_given){ |
286 timesFileName=args_info.timesList_arg; | 255 timesFileName=args_info.timesList_arg; |
287 if(strlen(timesFileName)>0){ | 256 if(strlen(timesFileName)>0){ |
288 if(!(timesFile = new ifstream(timesFileName,ios::in))) | 257 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) |
289 error("Could not open timesList file for reading", timesFileName); | 258 error("Could not open timesList file for reading", timesFileName); |
290 usingTimes=1; | 259 usingTimes=1; |
291 } | 260 } |
292 } | 261 } |
293 if(args_info.powerList_given){ | 262 if(args_info.powerList_given){ |
294 powerFileName=args_info.powerList_arg; | 263 powerFileName=args_info.powerList_arg; |
295 if(strlen(powerFileName)>0){ | 264 if(strlen(powerFileName)>0){ |
296 if(!(powerFile = new ifstream(powerFileName,ios::in))) | 265 if(!(powerFile = new std::ifstream(powerFileName,std::ios::in))) |
297 error("Could not open powerList file for reading", powerFileName); | 266 error("Could not open powerList file for reading", powerFileName); |
298 usingPower=1; | 267 usingPower=1; |
299 } | 268 } |
300 } | 269 } |
301 return 0; | 270 return 0; |
307 dbName=args_info.database_arg; | 276 dbName=args_info.database_arg; |
308 inFile=args_info.features_arg; | 277 inFile=args_info.features_arg; |
309 | 278 |
310 if(args_info.keyList_given){ | 279 if(args_info.keyList_given){ |
311 trackFileName=args_info.keyList_arg; | 280 trackFileName=args_info.keyList_arg; |
312 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) | 281 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in))) |
313 error("Could not open keyList file for reading",trackFileName); | 282 error("Could not open keyList file for reading",trackFileName); |
314 } | 283 } |
315 | 284 |
316 if(args_info.times_given){ | 285 if(args_info.times_given){ |
317 timesFileName=args_info.times_arg; | 286 timesFileName=args_info.times_arg; |
318 if(strlen(timesFileName)>0){ | 287 if(strlen(timesFileName)>0){ |
319 if(!(timesFile = new ifstream(timesFileName,ios::in))) | 288 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) |
320 error("Could not open times file for reading", timesFileName); | 289 error("Could not open times file for reading", timesFileName); |
321 usingTimes=1; | 290 usingTimes=1; |
322 } | 291 } |
323 } | 292 } |
324 | 293 |
380 return 0; | 349 return 0; |
381 } | 350 } |
382 return -1; // no command found | 351 return -1; // no command found |
383 } | 352 } |
384 | 353 |
385 void audioDB::get_lock(int fd, bool exclusive) { | |
386 struct flock lock; | |
387 int status; | |
388 | |
389 lock.l_type = exclusive ? F_WRLCK : F_RDLCK; | |
390 lock.l_whence = SEEK_SET; | |
391 lock.l_start = 0; | |
392 lock.l_len = 0; /* "the whole file" */ | |
393 | |
394 retry: | |
395 do { | |
396 status = fcntl(fd, F_SETLKW, &lock); | |
397 } while (status != 0 && errno == EINTR); | |
398 | |
399 if (status) { | |
400 if (errno == EAGAIN) { | |
401 sleep(1); | |
402 goto retry; | |
403 } else { | |
404 error("fcntl lock error", "", "fcntl"); | |
405 } | |
406 } | |
407 } | |
408 | |
409 void audioDB::release_lock(int fd) { | |
410 struct flock lock; | |
411 int status; | |
412 | |
413 lock.l_type = F_UNLCK; | |
414 lock.l_whence = SEEK_SET; | |
415 lock.l_start = 0; | |
416 lock.l_len = 0; | |
417 | |
418 status = fcntl(fd, F_SETLKW, &lock); | |
419 | |
420 if (status) | |
421 error("fcntl unlock error", "", "fcntl"); | |
422 } | |
423 | |
424 /* Make a new database. | |
425 | |
426 The database consists of: | |
427 | |
428 * a header (see dbTableHeader struct definition); | |
429 * keyTable: list of keys of tracks; | |
430 * trackTable: Maps implicit feature index to a feature vector | |
431 matrix (sizes of tracks) | |
432 * featureTable: Lots of doubles; | |
433 * timesTable: (start,end) time points for each feature vector; | |
434 * powerTable: associated power for each feature vector; | |
435 * l2normTable: squared l2norms for each feature vector. | |
436 */ | |
437 void audioDB::create(const char* dbName){ | |
438 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) | |
439 error("Can't create database file", dbName, "open"); | |
440 get_lock(dbfid, 1); | |
441 | |
442 if(verbosity) { | |
443 cerr << "header size:" << O2_HEADERSIZE << endl; | |
444 } | |
445 | |
446 dbH = new dbTableHeaderT(); | |
447 assert(dbH); | |
448 | |
449 unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE); | |
450 | |
451 // Initialize header | |
452 dbH->magic = O2_MAGIC; | |
453 dbH->version = O2_FORMAT_VERSION; | |
454 dbH->numFiles = 0; | |
455 dbH->dim = 0; | |
456 dbH->flags = 0; | |
457 dbH->length = 0; | |
458 dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE); | |
459 dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles); | |
460 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles); | |
461 dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); | |
462 dbH->powerTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); | |
463 dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->powerTableOffset - 2*maxfiles*O2_MEANNUMVECTORS*sizeof(double)); | |
464 dbH->dbSize = size; | |
465 | |
466 write(dbfid, dbH, O2_HEADERSIZE); | |
467 | |
468 // go to the location corresponding to the last byte | |
469 if (lseek (dbfid, size - 1, SEEK_SET) == -1) | |
470 error("lseek error in db file", "", "lseek"); | |
471 | |
472 // write a dummy byte at the last location | |
473 if (write (dbfid, "", 1) != 1) | |
474 error("write error", "", "write"); | |
475 | |
476 if(verbosity) { | |
477 cerr << COM_CREATE << " " << dbName << endl; | |
478 } | |
479 } | |
480 | |
481 void audioDB::drop(){ | |
482 // FIXME: drop something? Should we even allow this? | |
483 } | |
484 | |
485 void audioDB::initDBHeader(const char* dbName) { | |
486 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { | |
487 error("Can't open database file", dbName, "open"); | |
488 } | |
489 | |
490 get_lock(dbfid, forWrite); | |
491 // Get the database header info | |
492 dbH = new dbTableHeaderT(); | |
493 assert(dbH); | |
494 | |
495 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { | |
496 error("error reading db header", dbName, "read"); | |
497 } | |
498 | |
499 if(dbH->magic == O2_OLD_MAGIC) { | |
500 // FIXME: if anyone ever complains, write the program to convert | |
501 // from the old audioDB format to the new... | |
502 error("database file has old O2 header", dbName); | |
503 } | |
504 | |
505 if(dbH->magic != O2_MAGIC) { | |
506 cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl; | |
507 error("database file has incorrect header", dbName); | |
508 } | |
509 | |
510 if(dbH->version != O2_FORMAT_VERSION) { | |
511 error("database file has incorect version", dbName); | |
512 } | |
513 | |
514 #define CHECKED_MMAP(type, var, start, length) \ | |
515 { void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \ | |
516 if(tmp == (void *) -1) { \ | |
517 error("mmap error for db table", #var, "mmap"); \ | |
518 } \ | |
519 var = (type) tmp; \ | |
520 } | |
521 | |
522 CHECKED_MMAP(char *, db, 0, getpagesize()); | |
523 | |
524 // Make some handy tables with correct types | |
525 if(forWrite || (dbH->length > 0)) { | |
526 if(forWrite) { | |
527 fileTableLength = dbH->trackTableOffset - dbH->fileTableOffset; | |
528 trackTableLength = dbH->dataOffset - dbH->trackTableOffset; | |
529 dataBufLength = dbH->timesTableOffset - dbH->dataOffset; | |
530 timesTableLength = dbH->powerTableOffset - dbH->timesTableOffset; | |
531 powerTableLength = dbH->l2normTableOffset - dbH->powerTableOffset; | |
532 l2normTableLength = dbH->dbSize - dbH->l2normTableOffset; | |
533 } else { | |
534 fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE); | |
535 trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE); | |
536 dataBufLength = ALIGN_PAGE_UP(dbH->length); | |
537 timesTableLength = ALIGN_PAGE_UP(2*(dbH->length / dbH->dim)); | |
538 powerTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim); | |
539 l2normTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim); | |
540 } | |
541 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, fileTableLength); | |
542 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, trackTableLength); | |
543 /* | |
544 * No more mmap() for dataBuf | |
545 * | |
546 * FIXME: Actually we do do the mmap() in the two cases where it's | |
547 * still "needed": in pointQuery and in l2norm if dbH->length is | |
548 * non-zero. Removing those cases too (and deleting the dataBuf | |
549 * variable completely) would be cool. -- CSR, 2007-11-19 | |
550 * | |
551 * CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); | |
552 */ | |
553 CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, timesTableLength); | |
554 CHECKED_MMAP(double *, powerTable, dbH->powerTableOffset, powerTableLength); | |
555 CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, l2normTableLength); | |
556 } | |
557 } | |
558 | |
559 void audioDB::initInputFile (const char *inFile) { | |
560 if (inFile) { | |
561 if ((infid = open(inFile, O_RDONLY)) < 0) { | |
562 error("can't open input file for reading", inFile, "open"); | |
563 } | |
564 | |
565 if (fstat(infid, &statbuf) < 0) { | |
566 error("fstat error finding size of input", inFile, "fstat"); | |
567 } | |
568 | |
569 if(dbH->dim == 0 && dbH->length == 0) { // empty database | |
570 // initialize with input dimensionality | |
571 if(read(infid, &dbH->dim, sizeof(unsigned)) != sizeof(unsigned)) { | |
572 error("short read of input file", inFile); | |
573 } | |
574 if(dbH->dim == 0) { | |
575 error("dimensionality of zero in input file", inFile); | |
576 } | |
577 } else { | |
578 unsigned test; | |
579 if(read(infid, &test, sizeof(unsigned)) != sizeof(unsigned)) { | |
580 error("short read of input file", inFile); | |
581 } | |
582 if(dbH->dim == 0) { | |
583 error("dimensionality of zero in input file", inFile); | |
584 } | |
585 if(dbH->dim != test) { | |
586 cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <<endl; | |
587 error("feature dimensions do not match database table dimensions", inFile); | |
588 } | |
589 } | |
590 | |
591 if ((indata = (char *) mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) == (caddr_t) -1) { | |
592 error("mmap error for input", inFile, "mmap"); | |
593 } | |
594 } | |
595 } | |
596 | |
597 void audioDB::initTables(const char* dbName, const char* inFile = 0) { | |
598 initDBHeader(dbName); | |
599 initInputFile(inFile); | |
600 } | |
601 | |
602 bool audioDB::enough_data_space_free(off_t size) { | |
603 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size); | |
604 } | |
605 | |
606 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) { | |
607 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET); | |
608 write(dbfid, buffer, size); | |
609 } | |
610 | |
611 void audioDB::insert(const char* dbName, const char* inFile) { | |
612 forWrite = true; | |
613 initTables(dbName, inFile); | |
614 | |
615 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
616 error("Must use timestamps with timestamped database","use --times"); | |
617 | |
618 if(!usingPower && (dbH->flags & O2_FLAG_POWER)) | |
619 error("Must use power with power-enabled database", dbName); | |
620 | |
621 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { | |
622 error("Insert failed: no more room in database", inFile); | |
623 } | |
624 | |
625 if(!key) | |
626 key=inFile; | |
627 // Linear scan of filenames check for pre-existing feature | |
628 unsigned alreadyInserted=0; | |
629 for(unsigned k=0; k<dbH->numFiles; k++) | |
630 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){ | |
631 alreadyInserted=1; | |
632 break; | |
633 } | |
634 | |
635 if(alreadyInserted){ | |
636 if(verbosity) { | |
637 cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl; | |
638 } | |
639 return; | |
640 } | |
641 | |
642 // Make a track index table of features to file indexes | |
643 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
644 if(!numVectors){ | |
645 if(verbosity) { | |
646 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; | |
647 } | |
648 // CLEAN UP | |
649 munmap(indata,statbuf.st_size); | |
650 munmap(db,dbH->dbSize); | |
651 close(infid); | |
652 return; | |
653 } | |
654 | |
655 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); | |
656 | |
657 off_t insertoffset = dbH->length;// Store current state | |
658 | |
659 // Check times status and insert times from file | |
660 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double)); | |
661 double *timesdata = timesTable + 2*indexoffset; | |
662 | |
663 if(2*(indexoffset + numVectors) > timesTableLength) { | |
664 error("out of space for times", key); | |
665 } | |
666 | |
667 if (usingTimes) { | |
668 insertTimeStamps(numVectors, timesFile, timesdata); | |
669 } | |
670 | |
671 double *powerdata = powerTable + indexoffset; | |
672 insertPowerData(numVectors, powerfd, powerdata); | |
673 | |
674 // Increment file count | |
675 dbH->numFiles++; | |
676 | |
677 // Update Header information | |
678 dbH->length+=(statbuf.st_size-sizeof(int)); | |
679 | |
680 // Update track to file index map | |
681 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned)); | |
682 | |
683 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int)); | |
684 | |
685 // Norm the vectors on input if the database is already L2 normed | |
686 if(dbH->flags & O2_FLAG_L2NORM) | |
687 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append | |
688 | |
689 // Report status | |
690 status(dbName); | |
691 if(verbosity) { | |
692 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " | |
693 << (statbuf.st_size-sizeof(int)) << " bytes." << endl; | |
694 } | |
695 | |
696 // Copy the header back to the database | |
697 memcpy (db, dbH, sizeof(dbTableHeaderT)); | |
698 | |
699 // CLEAN UP | |
700 munmap(indata,statbuf.st_size); | |
701 close(infid); | |
702 } | |
703 | |
704 void audioDB::insertTimeStamps(unsigned numVectors, ifstream *timesFile, double *timesdata) { | |
705 assert(usingTimes); | |
706 | |
707 unsigned numtimes = 0; | |
708 | |
709 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) { | |
710 dbH->flags=dbH->flags|O2_FLAG_TIMES; | |
711 } else if(!(dbH->flags & O2_FLAG_TIMES)) { | |
712 error("Timestamp file used with non-timestamped database", timesFileName); | |
713 } | |
714 | |
715 if(!timesFile->is_open()) { | |
716 error("problem opening times file on timestamped database", timesFileName); | |
717 } | |
718 | |
719 double timepoint, next; | |
720 *timesFile >> timepoint; | |
721 if (timesFile->eof()) { | |
722 error("no entries in times file", timesFileName); | |
723 } | |
724 numtimes++; | |
725 do { | |
726 *timesFile >> next; | |
727 if (timesFile->eof()) { | |
728 break; | |
729 } | |
730 numtimes++; | |
731 timesdata[0] = timepoint; | |
732 timepoint = (timesdata[1] = next); | |
733 timesdata += 2; | |
734 } while (numtimes < numVectors + 1); | |
735 | |
736 if (numtimes < numVectors + 1) { | |
737 error("too few timepoints in times file", timesFileName); | |
738 } | |
739 | |
740 *timesFile >> next; | |
741 if (!timesFile->eof()) { | |
742 error("too many timepoints in times file", timesFileName); | |
743 } | |
744 } | |
745 | |
746 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) { | |
747 if (usingPower) { | |
748 if (!(dbH->flags & O2_FLAG_POWER)) { | |
749 error("Cannot insert power data on non-power DB", dbName); | |
750 } | |
751 | |
752 int one; | |
753 unsigned int count; | |
754 | |
755 count = read(powerfd, &one, sizeof(unsigned int)); | |
756 if (count != sizeof(unsigned int)) { | |
757 error("powerfd read failed", "int", "read"); | |
758 } | |
759 if (one != 1) { | |
760 error("dimensionality of power file not 1", powerFileName); | |
761 } | |
762 | |
763 // FIXME: should check that the powerfile is the right size for | |
764 // this. -- CSR, 2007-10-30 | |
765 count = read(powerfd, powerdata, numVectors * sizeof(double)); | |
766 if (count != numVectors * sizeof(double)) { | |
767 error("powerfd read failed", "double", "read"); | |
768 } | |
769 } | |
770 } | |
771 | |
772 void audioDB::batchinsert(const char* dbName, const char* inFile) { | |
773 | |
774 forWrite = true; | |
775 initDBHeader(dbName); | |
776 | |
777 if(!key) | |
778 key=inFile; | |
779 ifstream *filesIn = 0; | |
780 ifstream *keysIn = 0; | |
781 ifstream* thisTimesFile = 0; | |
782 int thispowerfd = 0; | |
783 | |
784 if(!(filesIn = new ifstream(inFile))) | |
785 error("Could not open batch in file", inFile); | |
786 if(key && key!=inFile) | |
787 if(!(keysIn = new ifstream(key))) | |
788 error("Could not open batch key file",key); | |
789 | |
790 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
791 error("Must use timestamps with timestamped database","use --times"); | |
792 | |
793 if(!usingPower && (dbH->flags & O2_FLAG_POWER)) | |
794 error("Must use power with power-enabled database", dbName); | |
795 | |
796 unsigned totalVectors=0; | |
797 char *thisKey = new char[MAXSTR]; | |
798 char *thisFile = new char[MAXSTR]; | |
799 char *thisTimesFileName = new char[MAXSTR]; | |
800 char *thisPowerFileName = new char[MAXSTR]; | |
801 | |
802 do{ | |
803 filesIn->getline(thisFile,MAXSTR); | |
804 if(key && key!=inFile) | |
805 keysIn->getline(thisKey,MAXSTR); | |
806 else | |
807 thisKey = thisFile; | |
808 if(usingTimes) | |
809 timesFile->getline(thisTimesFileName,MAXSTR); | |
810 if(usingPower) | |
811 powerFile->getline(thisPowerFileName, MAXSTR); | |
812 | |
813 if(filesIn->eof()) | |
814 break; | |
815 | |
816 initInputFile(thisFile); | |
817 | |
818 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { | |
819 error("batchinsert failed: no more room in database", thisFile); | |
820 } | |
821 | |
822 // Linear scan of filenames check for pre-existing feature | |
823 unsigned alreadyInserted=0; | |
824 | |
825 for(unsigned k=0; k<dbH->numFiles; k++) | |
826 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){ | |
827 alreadyInserted=1; | |
828 break; | |
829 } | |
830 | |
831 if(alreadyInserted){ | |
832 if(verbosity) { | |
833 cerr << "Warning: key already exists in database:" << thisKey << endl; | |
834 } | |
835 } | |
836 else{ | |
837 | |
838 // Make a track index table of features to file indexes | |
839 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
840 if(!numVectors){ | |
841 if(verbosity) { | |
842 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; | |
843 } | |
844 } | |
845 else{ | |
846 if(usingTimes){ | |
847 if(timesFile->eof()) { | |
848 error("not enough timestamp files in timesList", timesFileName); | |
849 } | |
850 thisTimesFile = new ifstream(thisTimesFileName,ios::in); | |
851 if(!thisTimesFile->is_open()) { | |
852 error("Cannot open timestamp file", thisTimesFileName); | |
853 } | |
854 off_t insertoffset = dbH->length; | |
855 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double)); | |
856 double *timesdata = timesTable + 2*indexoffset; | |
857 if(2*(indexoffset + numVectors) > timesTableLength) { | |
858 error("out of space for times", key); | |
859 } | |
860 insertTimeStamps(numVectors, thisTimesFile, timesdata); | |
861 if(thisTimesFile) | |
862 delete thisTimesFile; | |
863 } | |
864 | |
865 if (usingPower) { | |
866 if(powerFile->eof()) { | |
867 error("not enough power files in powerList", powerFileName); | |
868 } | |
869 thispowerfd = open(thisPowerFileName, O_RDONLY); | |
870 if (thispowerfd < 0) { | |
871 error("failed to open power file", thisPowerFileName); | |
872 } | |
873 unsigned insertoffset = dbH->length; | |
874 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double)); | |
875 double *powerdata = powerTable + poweroffset; | |
876 insertPowerData(numVectors, thispowerfd, powerdata); | |
877 if (0 < thispowerfd) { | |
878 close(thispowerfd); | |
879 } | |
880 } | |
881 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); | |
882 | |
883 off_t insertoffset = dbH->length;// Store current state | |
884 | |
885 // Increment file count | |
886 dbH->numFiles++; | |
887 | |
888 // Update Header information | |
889 dbH->length+=(statbuf.st_size-sizeof(int)); | |
890 | |
891 // Update track to file index map | |
892 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | |
893 | |
894 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int)); | |
895 | |
896 // Norm the vectors on input if the database is already L2 normed | |
897 if(dbH->flags & O2_FLAG_L2NORM) | |
898 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append | |
899 | |
900 totalVectors+=numVectors; | |
901 | |
902 // Copy the header back to the database | |
903 memcpy (db, dbH, sizeof(dbTableHeaderT)); | |
904 } | |
905 } | |
906 // CLEAN UP | |
907 munmap(indata,statbuf.st_size); | |
908 close(infid); | |
909 }while(!filesIn->eof()); | |
910 | |
911 if(verbosity) { | |
912 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " | |
913 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; | |
914 } | |
915 | |
916 // Report status | |
917 status(dbName); | |
918 } | |
919 | |
920 // FIXME: this can't propagate the sequence length argument (used for | |
921 // dudCount). See adb__status() definition for the other half of | |
922 // this. -- CSR, 2007-10-01 | |
923 void audioDB::ws_status(const char*dbName, char* hostport){ | |
924 struct soap soap; | |
925 adb__statusResponse adbStatusResponse; | |
926 | |
927 // Query an existing adb database | |
928 soap_init(&soap); | |
929 if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResponse)==SOAP_OK) { | |
930 cout << "numFiles = " << adbStatusResponse.result.numFiles << endl; | |
931 cout << "dim = " << adbStatusResponse.result.dim << endl; | |
932 cout << "length = " << adbStatusResponse.result.length << endl; | |
933 cout << "dudCount = " << adbStatusResponse.result.dudCount << endl; | |
934 cout << "nullCount = " << adbStatusResponse.result.nullCount << endl; | |
935 cout << "flags = " << adbStatusResponse.result.flags << endl; | |
936 } else { | |
937 soap_print_fault(&soap,stderr); | |
938 } | |
939 | |
940 soap_destroy(&soap); | |
941 soap_end(&soap); | |
942 soap_done(&soap); | |
943 } | |
944 | |
945 void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){ | |
946 struct soap soap; | |
947 adb__queryResponse adbQueryResponse; | |
948 | |
949 soap_init(&soap); | |
950 if(soap_call_adb__query(&soap,hostport,NULL, | |
951 (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName, | |
952 queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResponse)==SOAP_OK){ | |
953 //std::cerr << "result list length:" << adbQueryResponse.result.__sizeRlist << std::endl; | |
954 for(int i=0; i<adbQueryResponse.result.__sizeRlist; i++) | |
955 std::cout << adbQueryResponse.result.Rlist[i] << " " << adbQueryResponse.result.Dist[i] | |
956 << " " << adbQueryResponse.result.Qpos[i] << " " << adbQueryResponse.result.Spos[i] << std::endl; | |
957 } | |
958 else | |
959 soap_print_fault(&soap,stderr); | |
960 | |
961 soap_destroy(&soap); | |
962 soap_end(&soap); | |
963 soap_done(&soap); | |
964 | |
965 } | |
966 | |
967 | |
968 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){ | 354 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){ |
969 if(!dbH) | 355 if(!dbH) |
970 initTables(dbName, 0); | 356 initTables(dbName, 0); |
971 | 357 |
972 unsigned dudCount=0; | 358 unsigned dudCount=0; |
980 } | 366 } |
981 | 367 |
982 if(adbStatusResponse == 0) { | 368 if(adbStatusResponse == 0) { |
983 | 369 |
984 // Update Header information | 370 // Update Header information |
985 cout << "num files:" << dbH->numFiles << endl; | 371 std::cout << "num files:" << dbH->numFiles << std::endl; |
986 cout << "data dim:" << dbH->dim <<endl; | 372 std::cout << "data dim:" << dbH->dim <<std::endl; |
987 if(dbH->dim>0){ | 373 if(dbH->dim>0){ |
988 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl; | 374 std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl; |
989 cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << endl; | 375 std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl; |
990 } | 376 } |
991 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; | 377 std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; |
992 cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << | 378 std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << |
993 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; | 379 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; |
994 cout << "flags:" << dbH->flags << endl; | 380 std::cout << "flags:" << dbH->flags << std::endl; |
995 | 381 |
996 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; | 382 std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl; |
997 } else { | 383 } else { |
998 adbStatusResponse->result.numFiles = dbH->numFiles; | 384 adbStatusResponse->result.numFiles = dbH->numFiles; |
999 adbStatusResponse->result.dim = dbH->dim; | 385 adbStatusResponse->result.dim = dbH->dim; |
1000 adbStatusResponse->result.length = dbH->length; | 386 adbStatusResponse->result.length = dbH->length; |
1001 adbStatusResponse->result.dudCount = dudCount; | 387 adbStatusResponse->result.dudCount = dudCount; |
1002 adbStatusResponse->result.nullCount = nullCount; | 388 adbStatusResponse->result.nullCount = nullCount; |
1003 adbStatusResponse->result.flags = dbH->flags; | 389 adbStatusResponse->result.flags = dbH->flags; |
1004 } | 390 } |
1005 } | |
1006 | |
1007 void audioDB::dump(const char* dbName){ | |
1008 if(!dbH) { | |
1009 initTables(dbName, 0); | |
1010 } | |
1011 | |
1012 if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) { | |
1013 error("error making output directory", output, "mkdir"); | |
1014 } | |
1015 | |
1016 char *cwd = new char[PATH_MAX]; | |
1017 | |
1018 if ((getcwd(cwd, PATH_MAX)) == 0) { | |
1019 error("error getting working directory", "", "getcwd"); | |
1020 } | |
1021 | |
1022 if((chdir(output)) < 0) { | |
1023 error("error changing working directory", output, "chdir"); | |
1024 } | |
1025 | |
1026 int fLfd, tLfd = 0, pLfd = 0, kLfd; | |
1027 FILE *fLFile, *tLFile = 0, *pLFile = 0, *kLFile; | |
1028 | |
1029 if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1030 error("error creating featureList file", "featureList.txt", "open"); | |
1031 } | |
1032 | |
1033 int times = dbH->flags & O2_FLAG_TIMES; | |
1034 if (times) { | |
1035 if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1036 error("error creating timesList file", "timesList.txt", "open"); | |
1037 } | |
1038 } | |
1039 | |
1040 int power = dbH->flags & O2_FLAG_POWER; | |
1041 if (power) { | |
1042 if ((pLfd = open("powerList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1043 error("error creating powerList file", "powerList.txt", "open"); | |
1044 } | |
1045 } | |
1046 | |
1047 if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1048 error("error creating keyList file", "keyList.txt", "open"); | |
1049 } | |
1050 | |
1051 /* can these fail? I sincerely hope not. */ | |
1052 fLFile = fdopen(fLfd, "w"); | |
1053 if (times) { | |
1054 tLFile = fdopen(tLfd, "w"); | |
1055 } | |
1056 if (power) { | |
1057 pLFile = fdopen(pLfd, "w"); | |
1058 } | |
1059 kLFile = fdopen(kLfd, "w"); | |
1060 | |
1061 char *fName = new char[256]; | |
1062 int ffd, pfd; | |
1063 FILE *tFile; | |
1064 unsigned pos = 0; | |
1065 lseek(dbfid, dbH->dataOffset, SEEK_SET); | |
1066 double *data_buffer; | |
1067 size_t data_buffer_size; | |
1068 for(unsigned k = 0; k < dbH->numFiles; k++) { | |
1069 fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE); | |
1070 snprintf(fName, 256, "%05d.features", k); | |
1071 if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1072 error("error creating feature file", fName, "open"); | |
1073 } | |
1074 if ((write(ffd, &dbH->dim, sizeof(uint32_t))) < 0) { | |
1075 error("error writing dimensions", fName, "write"); | |
1076 } | |
1077 | |
1078 /* FIXME: this repeated malloc()/free() of data buffers is | |
1079 inefficient. */ | |
1080 data_buffer_size = trackTable[k] * dbH->dim * sizeof(double); | |
1081 | |
1082 { | |
1083 void *tmp = malloc(data_buffer_size); | |
1084 if (tmp == NULL) { | |
1085 error("error allocating data buffer"); | |
1086 } | |
1087 data_buffer = (double *) tmp; | |
1088 } | |
1089 | |
1090 if ((read(dbfid, data_buffer, data_buffer_size)) != (ssize_t) data_buffer_size) { | |
1091 error("error reading data", fName, "read"); | |
1092 } | |
1093 | |
1094 if ((write(ffd, data_buffer, data_buffer_size)) < 0) { | |
1095 error("error writing data", fName, "write"); | |
1096 } | |
1097 | |
1098 free(data_buffer); | |
1099 | |
1100 fprintf(fLFile, "%s\n", fName); | |
1101 close(ffd); | |
1102 | |
1103 if (times) { | |
1104 snprintf(fName, 256, "%05d.times", k); | |
1105 tFile = fopen(fName, "w"); | |
1106 for(unsigned i = 0; i < trackTable[k]; i++) { | |
1107 // KLUDGE: specifying 16 digits of precision after the decimal | |
1108 // point is (but check this!) sufficient to uniquely identify | |
1109 // doubles; however, that will cause ugliness, as that's | |
1110 // vastly too many for most values of interest. Moving to %a | |
1111 // here and scanf() in the timesFile reading might fix this. | |
1112 // -- CSR, 2007-10-19 | |
1113 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*i)); | |
1114 } | |
1115 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*trackTable[k]-1)); | |
1116 | |
1117 fprintf(tLFile, "%s\n", fName); | |
1118 } | |
1119 | |
1120 if (power) { | |
1121 uint32_t one = 1; | |
1122 snprintf(fName, 256, "%05d.power", k); | |
1123 if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1124 error("error creating power file", fName, "open"); | |
1125 } | |
1126 if ((write(pfd, &one, sizeof(uint32_t))) < 0) { | |
1127 error("error writing one", fName, "write"); | |
1128 } | |
1129 if ((write(pfd, powerTable + pos, trackTable[k] * sizeof(double))) < 0) { | |
1130 error("error writing data", fName, "write"); | |
1131 } | |
1132 fprintf(pLFile, "%s\n", fName); | |
1133 close(pfd); | |
1134 } | |
1135 | |
1136 pos += trackTable[k]; | |
1137 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; | |
1138 } | |
1139 | |
1140 FILE *scriptFile; | |
1141 scriptFile = fopen("restore.sh", "w"); | |
1142 fprintf(scriptFile, "\ | |
1143 #! /bin/sh\n\ | |
1144 #\n\ | |
1145 # usage: AUDIODB=/path/to/audioDB sh ./restore.sh <newdb>\n\ | |
1146 \n\ | |
1147 if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\ | |
1148 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\ | |
1149 \"${AUDIODB}\" -d \"$1\" -N --size=%d\n", (int) (dbH->dbSize / 1000000)); | |
1150 if(dbH->flags & O2_FLAG_L2NORM) { | |
1151 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n"); | |
1152 } | |
1153 if(power) { | |
1154 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -P\n"); | |
1155 } | |
1156 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt"); | |
1157 if(times) { | |
1158 fprintf(scriptFile, " -T timesList.txt"); | |
1159 } | |
1160 if(power) { | |
1161 fprintf(scriptFile, " -W powerList.txt"); | |
1162 } | |
1163 fprintf(scriptFile, "\n"); | |
1164 fclose(scriptFile); | |
1165 | |
1166 if((chdir(cwd)) < 0) { | |
1167 error("error changing working directory", cwd, "chdir"); | |
1168 } | |
1169 | |
1170 fclose(fLFile); | |
1171 if(times) { | |
1172 fclose(tLFile); | |
1173 } | |
1174 if(power) { | |
1175 fclose(pLFile); | |
1176 } | |
1177 fclose(kLFile); | |
1178 delete[] fName; | |
1179 | |
1180 status(dbName); | |
1181 } | 391 } |
1182 | 392 |
1183 void audioDB::l2norm(const char* dbName) { | 393 void audioDB::l2norm(const char* dbName) { |
1184 forWrite = true; | 394 forWrite = true; |
1185 initTables(dbName, 0); | 395 initTables(dbName, 0); |
1202 } | 412 } |
1203 dbH->flags |= O2_FLAG_POWER; | 413 dbH->flags |= O2_FLAG_POWER; |
1204 memcpy(db, dbH, O2_HEADERSIZE); | 414 memcpy(db, dbH, O2_HEADERSIZE); |
1205 } | 415 } |
1206 | 416 |
1207 bool audioDB::powers_acceptable(double p1, double p2) { | |
1208 if (use_absolute_threshold) { | |
1209 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) { | |
1210 return false; | |
1211 } | |
1212 } | |
1213 if (use_relative_threshold) { | |
1214 if (fabs(p1-p2) > fabs(relative_threshold)) { | |
1215 return false; | |
1216 } | |
1217 } | |
1218 return true; | |
1219 } | |
1220 | |
1221 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ | |
1222 switch(queryType){ | |
1223 case O2_POINT_QUERY: | |
1224 pointQuery(dbName, inFile, adbQueryResponse); | |
1225 break; | |
1226 case O2_SEQUENCE_QUERY: | |
1227 if(radius==0) | |
1228 trackSequenceQueryNN(dbName, inFile, adbQueryResponse); | |
1229 else | |
1230 trackSequenceQueryRad(dbName, inFile, adbQueryResponse); | |
1231 break; | |
1232 case O2_TRACK_QUERY: | |
1233 trackPointQuery(dbName, inFile, adbQueryResponse); | |
1234 break; | |
1235 default: | |
1236 error("unrecognized queryType in query()"); | |
1237 | |
1238 } | |
1239 } | |
1240 | |
1241 //return ordinal position of key in keyTable | |
1242 unsigned audioDB::getKeyPos(char* key){ | |
1243 for(unsigned k=0; k<dbH->numFiles; k++) | |
1244 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) | |
1245 return k; | |
1246 error("Key not found",key); | |
1247 return O2_ERR_KEYNOTFOUND; | |
1248 } | |
1249 | |
1250 // Basic point query engine | |
1251 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { | |
1252 | |
1253 initTables(dbName, inFile); | |
1254 | |
1255 // For each input vector, find the closest pointNN matching output vectors and report | |
1256 // we use stdout in this stub version | |
1257 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
1258 | |
1259 double* query = (double*)(indata+sizeof(int)); | |
1260 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); | |
1261 double* data = dataBuf; | |
1262 double* queryCopy = 0; | |
1263 | |
1264 if( dbH->flags & O2_FLAG_L2NORM ){ | |
1265 // Make a copy of the query | |
1266 queryCopy = new double[numVectors*dbH->dim]; | |
1267 qNorm = new double[numVectors]; | |
1268 assert(queryCopy&&qNorm); | |
1269 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | |
1270 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | |
1271 query = queryCopy; | |
1272 } | |
1273 | |
1274 // Make temporary dynamic memory for results | |
1275 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
1276 double distances[pointNN]; | |
1277 unsigned qIndexes[pointNN]; | |
1278 unsigned sIndexes[pointNN]; | |
1279 for(unsigned k=0; k<pointNN; k++){ | |
1280 distances[k]=-DBL_MAX; | |
1281 qIndexes[k]=~0; | |
1282 sIndexes[k]=~0; | |
1283 } | |
1284 | |
1285 unsigned j=numVectors; | |
1286 unsigned k,l,n; | |
1287 double thisDist; | |
1288 | |
1289 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double)); | |
1290 double meanQdur = 0; | |
1291 double *timesdata = 0; | |
1292 double *querydurs = 0; | |
1293 double *dbdurs = 0; | |
1294 | |
1295 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | |
1296 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | |
1297 usingTimes=0; | |
1298 } | |
1299 | |
1300 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
1301 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | |
1302 | |
1303 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | |
1304 timesdata = new double[2*numVectors]; | |
1305 querydurs = new double[numVectors]; | |
1306 insertTimeStamps(numVectors, timesFile, timesdata); | |
1307 // Calculate durations of points | |
1308 for(k=0; k<numVectors-1; k++){ | |
1309 querydurs[k]=timesdata[2*k+1]-timesdata[2*k]; | |
1310 meanQdur+=querydurs[k]; | |
1311 } | |
1312 meanQdur/=k; | |
1313 // Individual exhaustive timepoint durations | |
1314 dbdurs = new double[totalVecs]; | |
1315 for(k=0; k<totalVecs-1; k++) { | |
1316 dbdurs[k]=timesTable[2*k+1]-timesTable[2*k]; | |
1317 } | |
1318 } | |
1319 | |
1320 if(usingQueryPoint) | |
1321 if(queryPoint>numVectors-1) | |
1322 error("queryPoint > numVectors in query"); | |
1323 else{ | |
1324 if(verbosity>1) { | |
1325 cerr << "query point: " << queryPoint << endl; cerr.flush(); | |
1326 } | |
1327 query=query+queryPoint*dbH->dim; | |
1328 numVectors=queryPoint+1; | |
1329 j=1; | |
1330 } | |
1331 | |
1332 gettimeofday(&tv1, NULL); | |
1333 while(j--){ // query | |
1334 data=dataBuf; | |
1335 k=totalVecs; // number of database vectors | |
1336 while(k--){ // database | |
1337 thisDist=0; | |
1338 l=dbH->dim; | |
1339 double* q=query; | |
1340 while(l--) | |
1341 thisDist+=*q++**data++; | |
1342 if(!usingTimes || | |
1343 (usingTimes | |
1344 && fabs(dbdurs[totalVecs-k-1]-querydurs[numVectors-j-1])<querydurs[numVectors-j-1]*timesTol)){ | |
1345 n=pointNN; | |
1346 while(n--){ | |
1347 if(thisDist>=distances[n]){ | |
1348 if((n==0 || thisDist<=distances[n-1])){ | |
1349 // Copy all values above up the queue | |
1350 for( l=pointNN-1 ; l >= n+1 ; l--){ | |
1351 distances[l]=distances[l-1]; | |
1352 qIndexes[l]=qIndexes[l-1]; | |
1353 sIndexes[l]=sIndexes[l-1]; | |
1354 } | |
1355 distances[n]=thisDist; | |
1356 qIndexes[n]=numVectors-j-1; | |
1357 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1; | |
1358 break; | |
1359 } | |
1360 } | |
1361 else | |
1362 break; | |
1363 } | |
1364 } | |
1365 } | |
1366 // Move query pointer to next query point | |
1367 query+=dbH->dim; | |
1368 } | |
1369 | |
1370 gettimeofday(&tv2, NULL); | |
1371 if(verbosity>1) { | |
1372 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | |
1373 } | |
1374 | |
1375 if(adbQueryResponse==0){ | |
1376 // Output answer | |
1377 // Loop over nearest neighbours | |
1378 for(k=0; k < pointNN; k++){ | |
1379 // Scan for key | |
1380 unsigned cumTrack=0; | |
1381 for(l=0 ; l<dbH->numFiles; l++){ | |
1382 cumTrack+=trackTable[l]; | |
1383 if(sIndexes[k]<cumTrack){ | |
1384 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " | |
1385 << sIndexes[k]+trackTable[l]-cumTrack << endl; | |
1386 break; | |
1387 } | |
1388 } | |
1389 } | |
1390 } | |
1391 else{ // Process Web Services Query | |
1392 int listLen; | |
1393 for(k = 0; k < pointNN; k++) { | |
1394 if(distances[k] == -DBL_MAX) | |
1395 break; | |
1396 } | |
1397 listLen = k; | |
1398 | |
1399 adbQueryResponse->result.__sizeRlist=listLen; | |
1400 adbQueryResponse->result.__sizeDist=listLen; | |
1401 adbQueryResponse->result.__sizeQpos=listLen; | |
1402 adbQueryResponse->result.__sizeSpos=listLen; | |
1403 adbQueryResponse->result.Rlist= new char*[listLen]; | |
1404 adbQueryResponse->result.Dist = new double[listLen]; | |
1405 adbQueryResponse->result.Qpos = new unsigned int[listLen]; | |
1406 adbQueryResponse->result.Spos = new unsigned int[listLen]; | |
1407 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ | |
1408 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; | |
1409 adbQueryResponse->result.Dist[k]=distances[k]; | |
1410 adbQueryResponse->result.Qpos[k]=qIndexes[k]; | |
1411 unsigned cumTrack=0; | |
1412 for(l=0 ; l<dbH->numFiles; l++){ | |
1413 cumTrack+=trackTable[l]; | |
1414 if(sIndexes[k]<cumTrack){ | |
1415 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); | |
1416 break; | |
1417 } | |
1418 } | |
1419 adbQueryResponse->result.Spos[k]=sIndexes[k]+trackTable[l]-cumTrack; | |
1420 } | |
1421 } | |
1422 | |
1423 // Clean up | |
1424 if(queryCopy) | |
1425 delete queryCopy; | |
1426 if(qNorm) | |
1427 delete qNorm; | |
1428 if(timesdata) | |
1429 delete[] timesdata; | |
1430 if(querydurs) | |
1431 delete[] querydurs; | |
1432 if(dbdurs) | |
1433 delete dbdurs; | |
1434 } | |
1435 | |
1436 // trackPointQuery | |
1437 // return the trackNN closest tracks to the query track | |
1438 // uses average of pointNN points per track | |
1439 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { | |
1440 initTables(dbName, inFile); | |
1441 | |
1442 // For each input vector, find the closest pointNN matching output vectors and report | |
1443 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
1444 double* query = (double*)(indata+sizeof(int)); | |
1445 double* data; | |
1446 double* queryCopy = 0; | |
1447 | |
1448 if( dbH->flags & O2_FLAG_L2NORM ){ | |
1449 // Make a copy of the query | |
1450 queryCopy = new double[numVectors*dbH->dim]; | |
1451 qNorm = new double[numVectors]; | |
1452 assert(queryCopy&&qNorm); | |
1453 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | |
1454 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | |
1455 query = queryCopy; | |
1456 } | |
1457 | |
1458 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
1459 assert(trackNN>0 && trackNN<=O2_MAXNN); | |
1460 | |
1461 // Make temporary dynamic memory for results | |
1462 double trackDistances[trackNN]; | |
1463 unsigned trackIDs[trackNN]; | |
1464 unsigned trackQIndexes[trackNN]; | |
1465 unsigned trackSIndexes[trackNN]; | |
1466 | |
1467 double distances[pointNN]; | |
1468 unsigned qIndexes[pointNN]; | |
1469 unsigned sIndexes[pointNN]; | |
1470 | |
1471 unsigned j=numVectors; // number of query points | |
1472 unsigned k,l,n, track, trackOffset=0, processedTracks=0; | |
1473 double thisDist; | |
1474 | |
1475 for(k=0; k<pointNN; k++){ | |
1476 distances[k]=-DBL_MAX; | |
1477 qIndexes[k]=~0; | |
1478 sIndexes[k]=~0; | |
1479 } | |
1480 | |
1481 for(k=0; k<trackNN; k++){ | |
1482 trackDistances[k]=-DBL_MAX; | |
1483 trackQIndexes[k]=~0; | |
1484 trackSIndexes[k]=~0; | |
1485 trackIDs[k]=~0; | |
1486 } | |
1487 | |
1488 double meanQdur = 0; | |
1489 double *timesdata = 0; | |
1490 double *querydurs = 0; | |
1491 double *meanDBdur = 0; | |
1492 | |
1493 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | |
1494 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | |
1495 usingTimes=0; | |
1496 } | |
1497 | |
1498 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
1499 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | |
1500 | |
1501 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | |
1502 timesdata = new double[2*numVectors]; | |
1503 querydurs = new double[numVectors]; | |
1504 insertTimeStamps(numVectors, timesFile, timesdata); | |
1505 // Calculate durations of points | |
1506 for(k=0; k<numVectors-1; k++) { | |
1507 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | |
1508 meanQdur += querydurs[k]; | |
1509 } | |
1510 meanQdur/=k; | |
1511 meanDBdur = new double[dbH->numFiles]; | |
1512 for(k=0; k<dbH->numFiles; k++){ | |
1513 meanDBdur[k]=0.0; | |
1514 for(j=0; j<trackTable[k]-1 ; j++) { | |
1515 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j]; | |
1516 } | |
1517 meanDBdur[k]/=j; | |
1518 } | |
1519 } | |
1520 | |
1521 if(usingQueryPoint) | |
1522 if(queryPoint>numVectors-1) | |
1523 error("queryPoint > numVectors in query"); | |
1524 else{ | |
1525 if(verbosity>1) { | |
1526 cerr << "query point: " << queryPoint << endl; cerr.flush(); | |
1527 } | |
1528 query=query+queryPoint*dbH->dim; | |
1529 numVectors=queryPoint+1; | |
1530 } | |
1531 | |
1532 // build track offset table | |
1533 off_t *trackOffsetTable = new off_t[dbH->numFiles]; | |
1534 unsigned cumTrack=0; | |
1535 off_t trackIndexOffset; | |
1536 for(k=0; k<dbH->numFiles;k++){ | |
1537 trackOffsetTable[k]=cumTrack; | |
1538 cumTrack+=trackTable[k]*dbH->dim; | |
1539 } | |
1540 | |
1541 char nextKey[MAXSTR]; | |
1542 | |
1543 gettimeofday(&tv1, NULL); | |
1544 | |
1545 size_t data_buffer_size = 0; | |
1546 double *data_buffer = 0; | |
1547 lseek(dbfid, dbH->dataOffset, SEEK_SET); | |
1548 | |
1549 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ | |
1550 | |
1551 trackOffset = trackOffsetTable[track]; // numDoubles offset | |
1552 | |
1553 // get trackID from file if using a control file | |
1554 if(trackFile) { | |
1555 trackFile->getline(nextKey,MAXSTR); | |
1556 if(!trackFile->eof()) { | |
1557 track = getKeyPos(nextKey); | |
1558 trackOffset = trackOffsetTable[track]; | |
1559 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); | |
1560 } else { | |
1561 break; | |
1562 } | |
1563 } | |
1564 | |
1565 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | |
1566 | |
1567 if(verbosity>7) { | |
1568 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); | |
1569 } | |
1570 | |
1571 if(dbH->flags & O2_FLAG_L2NORM) | |
1572 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; | |
1573 else | |
1574 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); | |
1575 if(usingQueryPoint) | |
1576 j=1; | |
1577 else | |
1578 j=numVectors; | |
1579 | |
1580 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { | |
1581 if(data_buffer) { | |
1582 free(data_buffer); | |
1583 } | |
1584 { | |
1585 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; | |
1586 void *tmp = malloc(data_buffer_size); | |
1587 if (tmp == NULL) { | |
1588 error("error allocating data buffer"); | |
1589 } | |
1590 data_buffer = (double *) tmp; | |
1591 } | |
1592 } | |
1593 | |
1594 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); | |
1595 | |
1596 while(j--){ | |
1597 k=trackTable[track]; // number of vectors in track | |
1598 data=data_buffer; // data for track | |
1599 while(k--){ | |
1600 thisDist=0; | |
1601 l=dbH->dim; | |
1602 double* q=query; | |
1603 while(l--) | |
1604 thisDist+=*q++**data++; | |
1605 if(!usingTimes || | |
1606 (usingTimes | |
1607 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ | |
1608 n=pointNN; | |
1609 while(n--){ | |
1610 if(thisDist>=distances[n]){ | |
1611 if((n==0 || thisDist<=distances[n-1])){ | |
1612 // Copy all values above up the queue | |
1613 for( l=pointNN-1 ; l > n ; l--){ | |
1614 distances[l]=distances[l-1]; | |
1615 qIndexes[l]=qIndexes[l-1]; | |
1616 sIndexes[l]=sIndexes[l-1]; | |
1617 } | |
1618 distances[n]=thisDist; | |
1619 qIndexes[n]=numVectors-j-1; | |
1620 sIndexes[n]=trackTable[track]-k-1; | |
1621 break; | |
1622 } | |
1623 } | |
1624 else | |
1625 break; | |
1626 } | |
1627 } | |
1628 } // track | |
1629 // Move query pointer to next query point | |
1630 query+=dbH->dim; | |
1631 } // query | |
1632 // Take the average of this track's distance | |
1633 // Test the track distances | |
1634 thisDist=0; | |
1635 for (n = 0; n < pointNN; n++) { | |
1636 if (distances[n] == -DBL_MAX) break; | |
1637 thisDist += distances[n]; | |
1638 } | |
1639 thisDist /= n; | |
1640 | |
1641 n=trackNN; | |
1642 while(n--){ | |
1643 if(thisDist>=trackDistances[n]){ | |
1644 if((n==0 || thisDist<=trackDistances[n-1])){ | |
1645 // Copy all values above up the queue | |
1646 for( l=trackNN-1 ; l > n ; l--){ | |
1647 trackDistances[l]=trackDistances[l-1]; | |
1648 trackQIndexes[l]=trackQIndexes[l-1]; | |
1649 trackSIndexes[l]=trackSIndexes[l-1]; | |
1650 trackIDs[l]=trackIDs[l-1]; | |
1651 } | |
1652 trackDistances[n]=thisDist; | |
1653 trackQIndexes[n]=qIndexes[0]; | |
1654 trackSIndexes[n]=sIndexes[0]; | |
1655 trackIDs[n]=track; | |
1656 break; | |
1657 } | |
1658 } | |
1659 else | |
1660 break; | |
1661 } | |
1662 for(unsigned k=0; k<pointNN; k++){ | |
1663 distances[k]=-DBL_MAX; | |
1664 qIndexes[k]=~0; | |
1665 sIndexes[k]=~0; | |
1666 } | |
1667 } // tracks | |
1668 | |
1669 free(data_buffer); | |
1670 | |
1671 gettimeofday(&tv2, NULL); | |
1672 | |
1673 if(verbosity>1) { | |
1674 cerr << endl << "processed tracks :" << processedTracks | |
1675 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | |
1676 } | |
1677 | |
1678 if(adbQueryResponse==0){ | |
1679 if(verbosity>1) { | |
1680 cerr<<endl; | |
1681 } | |
1682 // Output answer | |
1683 // Loop over nearest neighbours | |
1684 for(k=0; k < min(trackNN,processedTracks); k++) | |
1685 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE | |
1686 << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << endl; | |
1687 } | |
1688 else{ // Process Web Services Query | |
1689 int listLen = min(trackNN, processedTracks); | |
1690 adbQueryResponse->result.__sizeRlist=listLen; | |
1691 adbQueryResponse->result.__sizeDist=listLen; | |
1692 adbQueryResponse->result.__sizeQpos=listLen; | |
1693 adbQueryResponse->result.__sizeSpos=listLen; | |
1694 adbQueryResponse->result.Rlist= new char*[listLen]; | |
1695 adbQueryResponse->result.Dist = new double[listLen]; | |
1696 adbQueryResponse->result.Qpos = new unsigned int[listLen]; | |
1697 adbQueryResponse->result.Spos = new unsigned int[listLen]; | |
1698 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ | |
1699 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; | |
1700 adbQueryResponse->result.Dist[k]=trackDistances[k]; | |
1701 adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; | |
1702 adbQueryResponse->result.Spos[k]=trackSIndexes[k]; | |
1703 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); | |
1704 } | |
1705 } | |
1706 | |
1707 // Clean up | |
1708 if(trackOffsetTable) | |
1709 delete trackOffsetTable; | |
1710 if(queryCopy) | |
1711 delete queryCopy; | |
1712 if(qNorm) | |
1713 delete qNorm; | |
1714 if(timesdata) | |
1715 delete[] timesdata; | |
1716 if(querydurs) | |
1717 delete[] querydurs; | |
1718 if(meanDBdur) | |
1719 delete meanDBdur; | |
1720 } | |
1721 | |
1722 // This is a common pattern in sequence queries: what we are doing is | |
1723 // taking a window of length seqlen over a buffer of length length, | |
1724 // and placing the sum of the elements in that window in the first | |
1725 // element of the window: thus replacing all but the last seqlen | |
1726 // elements in the buffer the corresponding windowed sum. | |
1727 void audioDB::sequence_sum(double *buffer, int length, int seqlen) { | |
1728 double tmp1, tmp2, *ps; | |
1729 int j, w; | |
1730 | |
1731 tmp1 = *buffer; | |
1732 j = 1; | |
1733 w = seqlen - 1; | |
1734 while(w--) { | |
1735 *buffer += buffer[j++]; | |
1736 } | |
1737 ps = buffer + 1; | |
1738 w = length - seqlen; // +1 - 1 | |
1739 while(w--) { | |
1740 tmp2 = *ps; | |
1741 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1); | |
1742 tmp1 = tmp2; | |
1743 ps++; | |
1744 } | |
1745 } | |
1746 | |
1747 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) { | |
1748 int w = length - seqlen + 1; | |
1749 while(w--) { | |
1750 *buffer = sqrt(*buffer); | |
1751 buffer++; | |
1752 } | |
1753 } | |
1754 | |
1755 void audioDB::sequence_average(double *buffer, int length, int seqlen) { | |
1756 int w = length - seqlen + 1; | |
1757 while(w--) { | |
1758 *buffer /= seqlen; | |
1759 buffer++; | |
1760 } | |
1761 } | |
1762 | |
1763 // k nearest-neighbor (k-NN) search between query and target tracks | |
1764 // efficient implementation based on matched filter | |
1765 // assumes normed shingles | |
1766 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track | |
1767 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ | |
1768 | |
1769 initTables(dbName, inFile); | |
1770 | |
1771 // For each input vector, find the closest pointNN matching output vectors and report | |
1772 // we use stdout in this stub version | |
1773 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
1774 double* query = (double*)(indata+sizeof(int)); | |
1775 double* queryCopy = 0; | |
1776 | |
1777 if(!(dbH->flags & O2_FLAG_L2NORM) ) | |
1778 error("Database must be L2 normed for sequence query","use -L2NORM"); | |
1779 | |
1780 if(numVectors<sequenceLength) | |
1781 error("Query shorter than requested sequence length", "maybe use -l"); | |
1782 | |
1783 if(verbosity>1) { | |
1784 cerr << "performing norms ... "; cerr.flush(); | |
1785 } | |
1786 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); | |
1787 | |
1788 // Make a copy of the query | |
1789 queryCopy = new double[numVectors*dbH->dim]; | |
1790 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | |
1791 qNorm = new double[numVectors]; | |
1792 sNorm = new double[dbVectors]; | |
1793 assert(qNorm&&sNorm&&queryCopy&&sequenceLength); | |
1794 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | |
1795 query = queryCopy; | |
1796 | |
1797 // Make norm measurements relative to sequenceLength | |
1798 unsigned w = sequenceLength-1; | |
1799 unsigned i,j; | |
1800 | |
1801 // Copy the L2 norm values to core to avoid disk random access later on | |
1802 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | |
1803 double* qnPtr = qNorm; | |
1804 double* snPtr = sNorm; | |
1805 | |
1806 double *sPower = 0, *qPower = 0; | |
1807 double *spPtr = 0, *qpPtr = 0; | |
1808 | |
1809 if (usingPower) { | |
1810 if (!(dbH->flags & O2_FLAG_POWER)) { | |
1811 error("database not power-enabled", dbName); | |
1812 } | |
1813 sPower = new double[dbVectors]; | |
1814 spPtr = sPower; | |
1815 memcpy(sPower, powerTable, dbVectors * sizeof(double)); | |
1816 } | |
1817 | |
1818 for(i=0; i<dbH->numFiles; i++){ | |
1819 if(trackTable[i]>=sequenceLength) { | |
1820 sequence_sum(snPtr, trackTable[i], sequenceLength); | |
1821 sequence_sqrt(snPtr, trackTable[i], sequenceLength); | |
1822 | |
1823 if (usingPower) { | |
1824 sequence_sum(spPtr, trackTable[i], sequenceLength); | |
1825 sequence_average(spPtr, trackTable[i], sequenceLength); | |
1826 } | |
1827 } | |
1828 snPtr += trackTable[i]; | |
1829 if (usingPower) { | |
1830 spPtr += trackTable[i]; | |
1831 } | |
1832 } | |
1833 | |
1834 sequence_sum(qnPtr, numVectors, sequenceLength); | |
1835 sequence_sqrt(qnPtr, numVectors, sequenceLength); | |
1836 | |
1837 if (usingPower) { | |
1838 qPower = new double[numVectors]; | |
1839 qpPtr = qPower; | |
1840 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { | |
1841 error("error seeking to data", powerFileName, "lseek"); | |
1842 } | |
1843 int count = read(powerfd, qPower, numVectors * sizeof(double)); | |
1844 if (count == -1) { | |
1845 error("error reading data", powerFileName, "read"); | |
1846 } | |
1847 if ((unsigned) count != numVectors * sizeof(double)) { | |
1848 error("short read", powerFileName); | |
1849 } | |
1850 | |
1851 sequence_sum(qpPtr, numVectors, sequenceLength); | |
1852 sequence_average(qpPtr, numVectors, sequenceLength); | |
1853 } | |
1854 | |
1855 if(verbosity>1) { | |
1856 cerr << "done." << endl; | |
1857 } | |
1858 | |
1859 if(verbosity>1) { | |
1860 cerr << "matching tracks..." << endl; | |
1861 } | |
1862 | |
1863 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
1864 assert(trackNN>0 && trackNN<=O2_MAXNN); | |
1865 | |
1866 // Make temporary dynamic memory for results | |
1867 double trackDistances[trackNN]; | |
1868 unsigned trackIDs[trackNN]; | |
1869 unsigned trackQIndexes[trackNN]; | |
1870 unsigned trackSIndexes[trackNN]; | |
1871 | |
1872 double distances[pointNN]; | |
1873 unsigned qIndexes[pointNN]; | |
1874 unsigned sIndexes[pointNN]; | |
1875 | |
1876 | |
1877 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; | |
1878 double thisDist; | |
1879 | |
1880 for(k=0; k<pointNN; k++){ | |
1881 distances[k]=1.0e6; | |
1882 qIndexes[k]=~0; | |
1883 sIndexes[k]=~0; | |
1884 } | |
1885 | |
1886 for(k=0; k<trackNN; k++){ | |
1887 trackDistances[k]=1.0e6; | |
1888 trackQIndexes[k]=~0; | |
1889 trackSIndexes[k]=~0; | |
1890 trackIDs[k]=~0; | |
1891 } | |
1892 | |
1893 // Timestamp and durations processing | |
1894 double meanQdur = 0; | |
1895 double *timesdata = 0; | |
1896 double *querydurs = 0; | |
1897 double *meanDBdur = 0; | |
1898 | |
1899 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | |
1900 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | |
1901 usingTimes=0; | |
1902 } | |
1903 | |
1904 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
1905 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | |
1906 | |
1907 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | |
1908 timesdata = new double[2*numVectors]; | |
1909 querydurs = new double[numVectors]; | |
1910 | |
1911 insertTimeStamps(numVectors, timesFile, timesdata); | |
1912 // Calculate durations of points | |
1913 for(k=0; k<numVectors-1; k++) { | |
1914 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | |
1915 meanQdur += querydurs[k]; | |
1916 } | |
1917 meanQdur/=k; | |
1918 if(verbosity>1) { | |
1919 cerr << "mean query file duration: " << meanQdur << endl; | |
1920 } | |
1921 meanDBdur = new double[dbH->numFiles]; | |
1922 assert(meanDBdur); | |
1923 for(k=0; k<dbH->numFiles; k++){ | |
1924 meanDBdur[k]=0.0; | |
1925 for(j=0; j<trackTable[k]-1 ; j++) { | |
1926 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j]; | |
1927 } | |
1928 meanDBdur[k]/=j; | |
1929 } | |
1930 } | |
1931 | |
1932 if(usingQueryPoint) | |
1933 if(queryPoint>numVectors || queryPoint>numVectors-wL+1) | |
1934 error("queryPoint > numVectors-wL+1 in query"); | |
1935 else{ | |
1936 if(verbosity>1) { | |
1937 cerr << "query point: " << queryPoint << endl; cerr.flush(); | |
1938 } | |
1939 query = query + queryPoint * dbH->dim; | |
1940 qnPtr = qnPtr + queryPoint; | |
1941 if (usingPower) { | |
1942 qpPtr = qpPtr + queryPoint; | |
1943 } | |
1944 numVectors=wL; | |
1945 } | |
1946 | |
1947 double ** D = 0; // Differences query and target | |
1948 double ** DD = 0; // Matched filter distance | |
1949 | |
1950 D = new double*[numVectors]; | |
1951 assert(D); | |
1952 DD = new double*[numVectors]; | |
1953 assert(DD); | |
1954 | |
1955 gettimeofday(&tv1, NULL); | |
1956 unsigned processedTracks = 0; | |
1957 unsigned successfulTracks=0; | |
1958 | |
1959 double* qp; | |
1960 double* sp; | |
1961 double* dp; | |
1962 | |
1963 // build track offset table | |
1964 off_t *trackOffsetTable = new off_t[dbH->numFiles]; | |
1965 unsigned cumTrack=0; | |
1966 off_t trackIndexOffset; | |
1967 for(k=0; k<dbH->numFiles;k++){ | |
1968 trackOffsetTable[k]=cumTrack; | |
1969 cumTrack+=trackTable[k]*dbH->dim; | |
1970 } | |
1971 | |
1972 char nextKey [MAXSTR]; | |
1973 | |
1974 // chi^2 statistics | |
1975 double sampleCount = 0; | |
1976 double sampleSum = 0; | |
1977 double logSampleSum = 0; | |
1978 double minSample = 1e9; | |
1979 double maxSample = 0; | |
1980 | |
1981 // Track loop | |
1982 size_t data_buffer_size = 0; | |
1983 double *data_buffer = 0; | |
1984 lseek(dbfid, dbH->dataOffset, SEEK_SET); | |
1985 | |
1986 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) { | |
1987 | |
1988 trackOffset = trackOffsetTable[track]; // numDoubles offset | |
1989 | |
1990 // get trackID from file if using a control file | |
1991 if(trackFile) { | |
1992 trackFile->getline(nextKey,MAXSTR); | |
1993 if(!trackFile->eof()) { | |
1994 track = getKeyPos(nextKey); | |
1995 trackOffset = trackOffsetTable[track]; | |
1996 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); | |
1997 } else { | |
1998 break; | |
1999 } | |
2000 } | |
2001 | |
2002 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | |
2003 | |
2004 if(sequenceLength<=trackTable[track]){ // test for short sequences | |
2005 | |
2006 if(verbosity>7) { | |
2007 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); | |
2008 } | |
2009 | |
2010 // Sum products matrix | |
2011 for(j=0; j<numVectors;j++){ | |
2012 D[j]=new double[trackTable[track]]; | |
2013 assert(D[j]); | |
2014 | |
2015 } | |
2016 | |
2017 // Matched filter matrix | |
2018 for(j=0; j<numVectors;j++){ | |
2019 DD[j]=new double[trackTable[track]]; | |
2020 assert(DD[j]); | |
2021 } | |
2022 | |
2023 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { | |
2024 if(data_buffer) { | |
2025 free(data_buffer); | |
2026 } | |
2027 { | |
2028 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; | |
2029 void *tmp = malloc(data_buffer_size); | |
2030 if (tmp == NULL) { | |
2031 error("error allocating data buffer"); | |
2032 } | |
2033 data_buffer = (double *) tmp; | |
2034 } | |
2035 } | |
2036 | |
2037 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); | |
2038 | |
2039 // Dot product | |
2040 for(j=0; j<numVectors; j++) | |
2041 for(k=0; k<trackTable[track]; k++){ | |
2042 qp=query+j*dbH->dim; | |
2043 sp=data_buffer+k*dbH->dim; | |
2044 DD[j][k]=0.0; // Initialize matched filter array | |
2045 dp=&D[j][k]; // point to correlation cell j,k | |
2046 *dp=0.0; // initialize correlation cell | |
2047 l=dbH->dim; // size of vectors | |
2048 while(l--) | |
2049 *dp+=*qp++**sp++; | |
2050 } | |
2051 | |
2052 // Matched Filter | |
2053 // HOP SIZE == 1 | |
2054 double* spd; | |
2055 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop | |
2056 for(w=0; w<wL; w++) | |
2057 for(j=0; j<numVectors-w; j++){ | |
2058 sp=DD[j]; | |
2059 spd=D[j+w]+w; | |
2060 k=trackTable[track]-w; | |
2061 while(k--) | |
2062 *sp+++=*spd++; | |
2063 } | |
2064 } | |
2065 | |
2066 else{ // HOP_SIZE != 1 | |
2067 for(w=0; w<wL; w++) | |
2068 for(j=0; j<numVectors-w; j+=HOP_SIZE){ | |
2069 sp=DD[j]; | |
2070 spd=D[j+w]+w; | |
2071 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){ | |
2072 *sp+=*spd; | |
2073 sp+=HOP_SIZE; | |
2074 spd+=HOP_SIZE; | |
2075 } | |
2076 } | |
2077 } | |
2078 | |
2079 if(verbosity>3 && usingTimes) { | |
2080 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; | |
2081 cerr.flush(); | |
2082 } | |
2083 | |
2084 if(!usingTimes || | |
2085 (usingTimes | |
2086 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ | |
2087 | |
2088 if(verbosity>3 && usingTimes) { | |
2089 cerr << "within duration tolerance." << endl; | |
2090 cerr.flush(); | |
2091 } | |
2092 | |
2093 // Search for minimum distance by shingles (concatenated vectors) | |
2094 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) | |
2095 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ | |
2096 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; | |
2097 if(verbosity>9) { | |
2098 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; | |
2099 } | |
2100 // Gather chi^2 statistics | |
2101 if(thisDist<minSample) | |
2102 minSample=thisDist; | |
2103 else if(thisDist>maxSample) | |
2104 maxSample=thisDist; | |
2105 if(thisDist>1e-9){ | |
2106 sampleCount++; | |
2107 sampleSum+=thisDist; | |
2108 logSampleSum+=log(thisDist); | |
2109 } | |
2110 | |
2111 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); | |
2112 // Power test | |
2113 if (usingPower) { | |
2114 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) { | |
2115 thisDist = 1000000.0; | |
2116 } | |
2117 } | |
2118 | |
2119 // k-NN match algorithm | |
2120 m=pointNN; | |
2121 while(m--){ | |
2122 if(thisDist<=distances[m]) | |
2123 if(m==0 || thisDist>=distances[m-1]){ | |
2124 // Shuffle distances up the list | |
2125 for(l=pointNN-1; l>m; l--){ | |
2126 distances[l]=distances[l-1]; | |
2127 qIndexes[l]=qIndexes[l-1]; | |
2128 sIndexes[l]=sIndexes[l-1]; | |
2129 } | |
2130 distances[m]=thisDist; | |
2131 if(usingQueryPoint) | |
2132 qIndexes[m]=queryPoint; | |
2133 else | |
2134 qIndexes[m]=j; | |
2135 sIndexes[m]=k; | |
2136 break; | |
2137 } | |
2138 } | |
2139 } | |
2140 // Calculate the mean of the N-Best matches | |
2141 thisDist=0.0; | |
2142 for(m=0; m<pointNN; m++) { | |
2143 if (distances[m] == 1000000.0) break; | |
2144 thisDist+=distances[m]; | |
2145 } | |
2146 thisDist/=m; | |
2147 | |
2148 // Let's see the distances then... | |
2149 if(verbosity>3) { | |
2150 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; | |
2151 } | |
2152 | |
2153 | |
2154 // All the track stuff goes here | |
2155 n=trackNN; | |
2156 while(n--){ | |
2157 if(thisDist<=trackDistances[n]){ | |
2158 if((n==0 || thisDist>=trackDistances[n-1])){ | |
2159 // Copy all values above up the queue | |
2160 for( l=trackNN-1 ; l > n ; l--){ | |
2161 trackDistances[l]=trackDistances[l-1]; | |
2162 trackQIndexes[l]=trackQIndexes[l-1]; | |
2163 trackSIndexes[l]=trackSIndexes[l-1]; | |
2164 trackIDs[l]=trackIDs[l-1]; | |
2165 } | |
2166 trackDistances[n]=thisDist; | |
2167 trackQIndexes[n]=qIndexes[0]; | |
2168 trackSIndexes[n]=sIndexes[0]; | |
2169 successfulTracks++; | |
2170 trackIDs[n]=track; | |
2171 break; | |
2172 } | |
2173 } | |
2174 else | |
2175 break; | |
2176 } | |
2177 } // Duration match | |
2178 | |
2179 // Clean up current track | |
2180 if(D!=NULL){ | |
2181 for(j=0; j<numVectors; j++) | |
2182 delete[] D[j]; | |
2183 } | |
2184 | |
2185 if(DD!=NULL){ | |
2186 for(j=0; j<numVectors; j++) | |
2187 delete[] DD[j]; | |
2188 } | |
2189 } | |
2190 // per-track reset array values | |
2191 for(unsigned k=0; k<pointNN; k++){ | |
2192 distances[k]=1.0e6; | |
2193 qIndexes[k]=~0; | |
2194 sIndexes[k]=~0; | |
2195 } | |
2196 } | |
2197 | |
2198 free(data_buffer); | |
2199 | |
2200 gettimeofday(&tv2,NULL); | |
2201 if(verbosity>1) { | |
2202 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" | |
2203 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | |
2204 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum | |
2205 << " minSample: " << minSample << " maxSample: " << maxSample << endl; | |
2206 } | |
2207 if(adbQueryResponse==0){ | |
2208 if(verbosity>1) { | |
2209 cerr<<endl; | |
2210 } | |
2211 // Output answer | |
2212 // Loop over nearest neighbours | |
2213 for(k=0; k < min(trackNN,successfulTracks); k++) | |
2214 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " " | |
2215 << trackQIndexes[k] << " " << trackSIndexes[k] << endl; | |
2216 } | |
2217 else{ // Process Web Services Query | |
2218 int listLen = min(trackNN, processedTracks); | |
2219 adbQueryResponse->result.__sizeRlist=listLen; | |
2220 adbQueryResponse->result.__sizeDist=listLen; | |
2221 adbQueryResponse->result.__sizeQpos=listLen; | |
2222 adbQueryResponse->result.__sizeSpos=listLen; | |
2223 adbQueryResponse->result.Rlist= new char*[listLen]; | |
2224 adbQueryResponse->result.Dist = new double[listLen]; | |
2225 adbQueryResponse->result.Qpos = new unsigned int[listLen]; | |
2226 adbQueryResponse->result.Spos = new unsigned int[listLen]; | |
2227 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ | |
2228 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; | |
2229 adbQueryResponse->result.Dist[k]=trackDistances[k]; | |
2230 adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; | |
2231 adbQueryResponse->result.Spos[k]=trackSIndexes[k]; | |
2232 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); | |
2233 } | |
2234 } | |
2235 | |
2236 // Clean up | |
2237 if(trackOffsetTable) | |
2238 delete[] trackOffsetTable; | |
2239 if(queryCopy) | |
2240 delete[] queryCopy; | |
2241 if(qNorm) | |
2242 delete[] qNorm; | |
2243 if(sNorm) | |
2244 delete[] sNorm; | |
2245 if(qPower) | |
2246 delete[] qPower; | |
2247 if(sPower) | |
2248 delete[] sPower; | |
2249 if(D) | |
2250 delete[] D; | |
2251 if(DD) | |
2252 delete[] DD; | |
2253 if(timesdata) | |
2254 delete[] timesdata; | |
2255 if(querydurs) | |
2256 delete[] querydurs; | |
2257 if(meanDBdur) | |
2258 delete[] meanDBdur; | |
2259 } | |
2260 | |
2261 // Radius search between query and target tracks | |
2262 // efficient implementation based on matched filter | |
2263 // assumes normed shingles | |
2264 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track | |
2265 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ | |
2266 | |
2267 initTables(dbName, inFile); | |
2268 | |
2269 // For each input vector, find the closest pointNN matching output vectors and report | |
2270 // we use stdout in this stub version | |
2271 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
2272 double* query = (double*)(indata+sizeof(int)); | |
2273 double* queryCopy = 0; | |
2274 | |
2275 if(!(dbH->flags & O2_FLAG_L2NORM) ) | |
2276 error("Database must be L2 normed for sequence query","use -l2norm"); | |
2277 | |
2278 if(verbosity>1) { | |
2279 cerr << "performing norms ... "; cerr.flush(); | |
2280 } | |
2281 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); | |
2282 | |
2283 // Make a copy of the query | |
2284 queryCopy = new double[numVectors*dbH->dim]; | |
2285 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | |
2286 qNorm = new double[numVectors]; | |
2287 sNorm = new double[dbVectors]; | |
2288 assert(qNorm&&sNorm&&queryCopy&&sequenceLength); | |
2289 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | |
2290 query = queryCopy; | |
2291 | |
2292 // Make norm measurements relative to sequenceLength | |
2293 unsigned w = sequenceLength-1; | |
2294 unsigned i,j; | |
2295 | |
2296 // Copy the L2 norm values to core to avoid disk random access later on | |
2297 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | |
2298 double* snPtr = sNorm; | |
2299 double* qnPtr = qNorm; | |
2300 | |
2301 double *sPower = 0, *qPower = 0; | |
2302 double *spPtr = 0, *qpPtr = 0; | |
2303 | |
2304 if (usingPower) { | |
2305 if(!(dbH->flags & O2_FLAG_POWER)) { | |
2306 error("database not power-enabled", dbName); | |
2307 } | |
2308 sPower = new double[dbVectors]; | |
2309 spPtr = sPower; | |
2310 memcpy(sPower, powerTable, dbVectors * sizeof(double)); | |
2311 } | |
2312 | |
2313 for(i=0; i<dbH->numFiles; i++){ | |
2314 if(trackTable[i]>=sequenceLength) { | |
2315 sequence_sum(snPtr, trackTable[i], sequenceLength); | |
2316 sequence_sqrt(snPtr, trackTable[i], sequenceLength); | |
2317 if (usingPower) { | |
2318 sequence_sum(spPtr, trackTable[i], sequenceLength); | |
2319 sequence_average(spPtr, trackTable[i], sequenceLength); | |
2320 } | |
2321 } | |
2322 snPtr += trackTable[i]; | |
2323 if (usingPower) { | |
2324 spPtr += trackTable[i]; | |
2325 } | |
2326 } | |
2327 | |
2328 sequence_sum(qnPtr, numVectors, sequenceLength); | |
2329 sequence_sqrt(qnPtr, numVectors, sequenceLength); | |
2330 | |
2331 if (usingPower) { | |
2332 qPower = new double[numVectors]; | |
2333 qpPtr = qPower; | |
2334 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { | |
2335 error("error seeking to data", powerFileName, "lseek"); | |
2336 } | |
2337 int count = read(powerfd, qPower, numVectors * sizeof(double)); | |
2338 if (count == -1) { | |
2339 error("error reading data", powerFileName, "read"); | |
2340 } | |
2341 if ((unsigned) count != numVectors * sizeof(double)) { | |
2342 error("short read", powerFileName); | |
2343 } | |
2344 | |
2345 sequence_sum(qpPtr, numVectors, sequenceLength); | |
2346 sequence_average(qpPtr, numVectors, sequenceLength); | |
2347 } | |
2348 | |
2349 if(verbosity>1) { | |
2350 cerr << "done." << endl; | |
2351 } | |
2352 | |
2353 if(verbosity>1) { | |
2354 cerr << "matching tracks..." << endl; | |
2355 } | |
2356 | |
2357 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
2358 assert(trackNN>0 && trackNN<=O2_MAXNN); | |
2359 | |
2360 // Make temporary dynamic memory for results | |
2361 double trackDistances[trackNN]; | |
2362 unsigned trackIDs[trackNN]; | |
2363 unsigned trackQIndexes[trackNN]; | |
2364 unsigned trackSIndexes[trackNN]; | |
2365 | |
2366 double distances[pointNN]; | |
2367 unsigned qIndexes[pointNN]; | |
2368 unsigned sIndexes[pointNN]; | |
2369 | |
2370 | |
2371 unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; | |
2372 double thisDist; | |
2373 | |
2374 for(k=0; k<pointNN; k++){ | |
2375 distances[k]=0.0; | |
2376 qIndexes[k]=~0; | |
2377 sIndexes[k]=~0; | |
2378 } | |
2379 | |
2380 for(k=0; k<trackNN; k++){ | |
2381 trackDistances[k]=0.0; | |
2382 trackQIndexes[k]=~0; | |
2383 trackSIndexes[k]=~0; | |
2384 trackIDs[k]=~0; | |
2385 } | |
2386 | |
2387 // Timestamp and durations processing | |
2388 double meanQdur = 0; | |
2389 double *timesdata = 0; | |
2390 double *querydurs = 0; | |
2391 double *meanDBdur = 0; | |
2392 | |
2393 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | |
2394 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | |
2395 usingTimes=0; | |
2396 } | |
2397 | |
2398 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
2399 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | |
2400 | |
2401 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | |
2402 timesdata = new double[2*numVectors]; | |
2403 querydurs = new double[numVectors]; | |
2404 | |
2405 insertTimeStamps(numVectors, timesFile, timesdata); | |
2406 // Calculate durations of points | |
2407 for(k=0; k<numVectors-1; k++){ | |
2408 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | |
2409 meanQdur += querydurs[k]; | |
2410 } | |
2411 meanQdur/=k; | |
2412 if(verbosity>1) { | |
2413 cerr << "mean query file duration: " << meanQdur << endl; | |
2414 } | |
2415 meanDBdur = new double[dbH->numFiles]; | |
2416 assert(meanDBdur); | |
2417 for(k=0; k<dbH->numFiles; k++){ | |
2418 meanDBdur[k]=0.0; | |
2419 for(j=0; j<trackTable[k]-1 ; j++) { | |
2420 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j]; | |
2421 } | |
2422 meanDBdur[k]/=j; | |
2423 } | |
2424 } | |
2425 | |
2426 if(usingQueryPoint) | |
2427 if(queryPoint>numVectors || queryPoint>numVectors-wL+1) | |
2428 error("queryPoint > numVectors-wL+1 in query"); | |
2429 else{ | |
2430 if(verbosity>1) { | |
2431 cerr << "query point: " << queryPoint << endl; cerr.flush(); | |
2432 } | |
2433 query = query + queryPoint*dbH->dim; | |
2434 qnPtr = qnPtr + queryPoint; | |
2435 if (usingPower) { | |
2436 qpPtr = qpPtr + queryPoint; | |
2437 } | |
2438 numVectors=wL; | |
2439 } | |
2440 | |
2441 double ** D = 0; // Differences query and target | |
2442 double ** DD = 0; // Matched filter distance | |
2443 | |
2444 D = new double*[numVectors]; | |
2445 assert(D); | |
2446 DD = new double*[numVectors]; | |
2447 assert(DD); | |
2448 | |
2449 gettimeofday(&tv1, NULL); | |
2450 unsigned processedTracks = 0; | |
2451 unsigned successfulTracks=0; | |
2452 | |
2453 double* qp; | |
2454 double* sp; | |
2455 double* dp; | |
2456 | |
2457 // build track offset table | |
2458 off_t *trackOffsetTable = new off_t[dbH->numFiles]; | |
2459 unsigned cumTrack=0; | |
2460 off_t trackIndexOffset; | |
2461 for(k=0; k<dbH->numFiles;k++){ | |
2462 trackOffsetTable[k]=cumTrack; | |
2463 cumTrack+=trackTable[k]*dbH->dim; | |
2464 } | |
2465 | |
2466 char nextKey [MAXSTR]; | |
2467 | |
2468 // chi^2 statistics | |
2469 double sampleCount = 0; | |
2470 double sampleSum = 0; | |
2471 double logSampleSum = 0; | |
2472 double minSample = 1e9; | |
2473 double maxSample = 0; | |
2474 | |
2475 // Track loop | |
2476 size_t data_buffer_size = 0; | |
2477 double *data_buffer = 0; | |
2478 lseek(dbfid, dbH->dataOffset, SEEK_SET); | |
2479 | |
2480 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ | |
2481 | |
2482 trackOffset = trackOffsetTable[track]; // numDoubles offset | |
2483 | |
2484 // get trackID from file if using a control file | |
2485 if(trackFile) { | |
2486 trackFile->getline(nextKey,MAXSTR); | |
2487 if(!trackFile->eof()) { | |
2488 track = getKeyPos(nextKey); | |
2489 trackOffset = trackOffsetTable[track]; | |
2490 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); | |
2491 } else { | |
2492 break; | |
2493 } | |
2494 } | |
2495 | |
2496 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | |
2497 | |
2498 if(sequenceLength<=trackTable[track]){ // test for short sequences | |
2499 | |
2500 if(verbosity>7) { | |
2501 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); | |
2502 } | |
2503 | |
2504 // Sum products matrix | |
2505 for(j=0; j<numVectors;j++){ | |
2506 D[j]=new double[trackTable[track]]; | |
2507 assert(D[j]); | |
2508 | |
2509 } | |
2510 | |
2511 // Matched filter matrix | |
2512 for(j=0; j<numVectors;j++){ | |
2513 DD[j]=new double[trackTable[track]]; | |
2514 assert(DD[j]); | |
2515 } | |
2516 | |
2517 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { | |
2518 if(data_buffer) { | |
2519 free(data_buffer); | |
2520 } | |
2521 { | |
2522 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; | |
2523 void *tmp = malloc(data_buffer_size); | |
2524 if (tmp == NULL) { | |
2525 error("error allocating data buffer"); | |
2526 } | |
2527 data_buffer = (double *) tmp; | |
2528 } | |
2529 } | |
2530 | |
2531 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); | |
2532 | |
2533 // Dot product | |
2534 for(j=0; j<numVectors; j++) | |
2535 for(k=0; k<trackTable[track]; k++){ | |
2536 qp=query+j*dbH->dim; | |
2537 sp=data_buffer+k*dbH->dim; | |
2538 DD[j][k]=0.0; // Initialize matched filter array | |
2539 dp=&D[j][k]; // point to correlation cell j,k | |
2540 *dp=0.0; // initialize correlation cell | |
2541 l=dbH->dim; // size of vectors | |
2542 while(l--) | |
2543 *dp+=*qp++**sp++; | |
2544 } | |
2545 | |
2546 // Matched Filter | |
2547 // HOP SIZE == 1 | |
2548 double* spd; | |
2549 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop | |
2550 for(w=0; w<wL; w++) | |
2551 for(j=0; j<numVectors-w; j++){ | |
2552 sp=DD[j]; | |
2553 spd=D[j+w]+w; | |
2554 k=trackTable[track]-w; | |
2555 while(k--) | |
2556 *sp+++=*spd++; | |
2557 } | |
2558 } | |
2559 | |
2560 else{ // HOP_SIZE != 1 | |
2561 for(w=0; w<wL; w++) | |
2562 for(j=0; j<numVectors-w; j+=HOP_SIZE){ | |
2563 sp=DD[j]; | |
2564 spd=D[j+w]+w; | |
2565 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){ | |
2566 *sp+=*spd; | |
2567 sp+=HOP_SIZE; | |
2568 spd+=HOP_SIZE; | |
2569 } | |
2570 } | |
2571 } | |
2572 | |
2573 if(verbosity>3 && usingTimes) { | |
2574 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; | |
2575 cerr.flush(); | |
2576 } | |
2577 | |
2578 if(!usingTimes || | |
2579 (usingTimes | |
2580 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ | |
2581 | |
2582 if(verbosity>3 && usingTimes) { | |
2583 cerr << "within duration tolerance." << endl; | |
2584 cerr.flush(); | |
2585 } | |
2586 | |
2587 // Search for minimum distance by shingles (concatenated vectors) | |
2588 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) | |
2589 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ | |
2590 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; | |
2591 if(verbosity>9) { | |
2592 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; | |
2593 } | |
2594 // Gather chi^2 statistics | |
2595 if(thisDist<minSample) | |
2596 minSample=thisDist; | |
2597 else if(thisDist>maxSample) | |
2598 maxSample=thisDist; | |
2599 if(thisDist>1e-9){ | |
2600 sampleCount++; | |
2601 sampleSum+=thisDist; | |
2602 logSampleSum+=log(thisDist); | |
2603 } | |
2604 | |
2605 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); | |
2606 // Power test | |
2607 if (usingPower) { | |
2608 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) { | |
2609 thisDist = 1000000.0; | |
2610 } | |
2611 } | |
2612 | |
2613 if(thisDist>=0 && thisDist<=radius){ | |
2614 distances[0]++; // increment count | |
2615 break; // only need one track point per query point | |
2616 } | |
2617 } | |
2618 // How many points were below threshold ? | |
2619 thisDist=distances[0]; | |
2620 | |
2621 // Let's see the distances then... | |
2622 if(verbosity>3) { | |
2623 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; | |
2624 } | |
2625 | |
2626 // All the track stuff goes here | |
2627 n=trackNN; | |
2628 while(n--){ | |
2629 if(thisDist>trackDistances[n]){ | |
2630 if((n==0 || thisDist<=trackDistances[n-1])){ | |
2631 // Copy all values above up the queue | |
2632 for( l=trackNN-1 ; l > n ; l--){ | |
2633 trackDistances[l]=trackDistances[l-1]; | |
2634 trackQIndexes[l]=trackQIndexes[l-1]; | |
2635 trackSIndexes[l]=trackSIndexes[l-1]; | |
2636 trackIDs[l]=trackIDs[l-1]; | |
2637 } | |
2638 trackDistances[n]=thisDist; | |
2639 trackQIndexes[n]=qIndexes[0]; | |
2640 trackSIndexes[n]=sIndexes[0]; | |
2641 successfulTracks++; | |
2642 trackIDs[n]=track; | |
2643 break; | |
2644 } | |
2645 } | |
2646 else | |
2647 break; | |
2648 } | |
2649 } // Duration match | |
2650 | |
2651 // Clean up current track | |
2652 if(D!=NULL){ | |
2653 for(j=0; j<numVectors; j++) | |
2654 delete[] D[j]; | |
2655 } | |
2656 | |
2657 if(DD!=NULL){ | |
2658 for(j=0; j<numVectors; j++) | |
2659 delete[] DD[j]; | |
2660 } | |
2661 } | |
2662 // per-track reset array values | |
2663 for(unsigned k=0; k<pointNN; k++){ | |
2664 distances[k]=0.0; | |
2665 qIndexes[k]=~0; | |
2666 sIndexes[k]=~0; | |
2667 } | |
2668 } | |
2669 | |
2670 free(data_buffer); | |
2671 | |
2672 gettimeofday(&tv2,NULL); | |
2673 if(verbosity>1) { | |
2674 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" | |
2675 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | |
2676 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum | |
2677 << " minSample: " << minSample << " maxSample: " << maxSample << endl; | |
2678 } | |
2679 | |
2680 if(adbQueryResponse==0){ | |
2681 if(verbosity>1) { | |
2682 cerr<<endl; | |
2683 } | |
2684 // Output answer | |
2685 // Loop over nearest neighbours | |
2686 for(k=0; k < min(trackNN,successfulTracks); k++) | |
2687 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << endl; | |
2688 } | |
2689 else{ // Process Web Services Query | |
2690 int listLen = min(trackNN, processedTracks); | |
2691 adbQueryResponse->result.__sizeRlist=listLen; | |
2692 adbQueryResponse->result.__sizeDist=listLen; | |
2693 adbQueryResponse->result.__sizeQpos=listLen; | |
2694 adbQueryResponse->result.__sizeSpos=listLen; | |
2695 adbQueryResponse->result.Rlist= new char*[listLen]; | |
2696 adbQueryResponse->result.Dist = new double[listLen]; | |
2697 adbQueryResponse->result.Qpos = new unsigned int[listLen]; | |
2698 adbQueryResponse->result.Spos = new unsigned int[listLen]; | |
2699 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ | |
2700 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; | |
2701 adbQueryResponse->result.Dist[k]=trackDistances[k]; | |
2702 adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; | |
2703 adbQueryResponse->result.Spos[k]=trackSIndexes[k]; | |
2704 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); | |
2705 } | |
2706 } | |
2707 | |
2708 // Clean up | |
2709 if(trackOffsetTable) | |
2710 delete[] trackOffsetTable; | |
2711 if(queryCopy) | |
2712 delete[] queryCopy; | |
2713 if(qNorm) | |
2714 delete[] qNorm; | |
2715 if(sNorm) | |
2716 delete[] sNorm; | |
2717 if(qPower) | |
2718 delete[] qPower; | |
2719 if(sPower) | |
2720 delete[] sPower; | |
2721 if(D) | |
2722 delete[] D; | |
2723 if(DD) | |
2724 delete[] DD; | |
2725 if(timesdata) | |
2726 delete[] timesdata; | |
2727 if(querydurs) | |
2728 delete[] querydurs; | |
2729 if(meanDBdur) | |
2730 delete[] meanDBdur; | |
2731 } | |
2732 | |
2733 // Unit norm block of features | 417 // Unit norm block of features |
2734 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ | 418 |
2735 unsigned d; | 419 /* FIXME: in fact this does not unit norm a block of features, it just |
2736 double L2, *p; | 420 records the L2 norms somewhere. unitNorm() does in fact unit norm |
2737 if(verbosity>2) { | 421 a block of features. */ |
2738 cerr << "norming " << n << " vectors...";cerr.flush(); | |
2739 } | |
2740 while(n--){ | |
2741 p=X; | |
2742 L2=0.0; | |
2743 d=dim; | |
2744 while(d--){ | |
2745 L2+=*p**p; | |
2746 p++; | |
2747 } | |
2748 /* L2=sqrt(L2);*/ | |
2749 if(qNorm) | |
2750 *qNorm++=L2; | |
2751 /* | |
2752 oneOverL2 = 1.0/L2; | |
2753 d=dim; | |
2754 while(d--){ | |
2755 *X*=oneOverL2; | |
2756 X++; | |
2757 */ | |
2758 X+=dim; | |
2759 } | |
2760 if(verbosity>2) { | |
2761 cerr << "done..." << endl; | |
2762 } | |
2763 } | |
2764 | |
2765 // Unit norm block of features | |
2766 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ | 422 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ |
2767 unsigned d; | 423 unsigned d; |
2768 double *p; | 424 double *p; |
2769 unsigned nn = n; | 425 unsigned nn = n; |
2770 | 426 |
2772 | 428 |
2773 if( !append && (dbH->flags & O2_FLAG_L2NORM) ) | 429 if( !append && (dbH->flags & O2_FLAG_L2NORM) ) |
2774 error("Database is already L2 normed", "automatic norm on insert is enabled"); | 430 error("Database is already L2 normed", "automatic norm on insert is enabled"); |
2775 | 431 |
2776 if(verbosity>2) { | 432 if(verbosity>2) { |
2777 cerr << "norming " << n << " vectors...";cerr.flush(); | 433 std::cerr << "norming " << n << " vectors...";std::cerr.flush(); |
2778 } | 434 } |
2779 | 435 |
2780 double* l2buf = new double[n]; | 436 double* l2buf = new double[n]; |
2781 double* l2ptr = l2buf; | 437 double* l2ptr = l2buf; |
2782 assert(l2buf); | 438 assert(l2buf); |
2805 } | 461 } |
2806 memcpy(l2normTable+offset, l2buf, n*sizeof(double)); | 462 memcpy(l2normTable+offset, l2buf, n*sizeof(double)); |
2807 if(l2buf) | 463 if(l2buf) |
2808 delete[] l2buf; | 464 delete[] l2buf; |
2809 if(verbosity>2) { | 465 if(verbosity>2) { |
2810 cerr << "done..." << endl; | 466 std::cerr << "done..." << std::endl; |
2811 } | |
2812 } | |
2813 | |
2814 | |
2815 // Start an audioDB server on the host | |
2816 void audioDB::startServer(){ | |
2817 struct soap soap; | |
2818 int m, s; // master and slave sockets | |
2819 soap_init(&soap); | |
2820 // FIXME: largely this use of SO_REUSEADDR is to make writing (and | |
2821 // running) test cases more convenient, so that multiple test runs | |
2822 // in close succession don't fail because of a bin() error. | |
2823 // Investigate whether there are any potential drawbacks in this, | |
2824 // and also whether there's a better way to write the tests. -- | |
2825 // CSR, 2007-10-03 | |
2826 soap.bind_flags |= SO_REUSEADDR; | |
2827 m = soap_bind(&soap, NULL, port, 100); | |
2828 if (m < 0) | |
2829 soap_print_fault(&soap, stderr); | |
2830 else | |
2831 { | |
2832 fprintf(stderr, "Socket connection successful: master socket = %d\n", m); | |
2833 for (int i = 1; ; i++) | |
2834 { | |
2835 s = soap_accept(&soap); | |
2836 if (s < 0) | |
2837 { | |
2838 soap_print_fault(&soap, stderr); | |
2839 break; | |
2840 } | |
2841 fprintf(stderr, "%d: accepted connection from IP=%lu.%lu.%lu.%lu socket=%d\n", i, | |
2842 (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s); | |
2843 if (soap_serve(&soap) != SOAP_OK) // process RPC request | |
2844 soap_print_fault(&soap, stderr); // print error | |
2845 fprintf(stderr, "request served\n"); | |
2846 soap_destroy(&soap); // clean up class instances | |
2847 soap_end(&soap); // clean up everything and close socket | |
2848 } | |
2849 } | |
2850 soap_done(&soap); // close master socket and detach environment | |
2851 } | |
2852 | |
2853 | |
2854 // web services | |
2855 | |
2856 // SERVER SIDE | |
2857 int adb__status(struct soap* soap, xsd__string dbName, adb__statusResponse &adbStatusResponse){ | |
2858 char* const argv[]={"audioDB",COM_STATUS,"-d",dbName}; | |
2859 const unsigned argc = 4; | |
2860 try { | |
2861 audioDB(argc, argv, &adbStatusResponse); | |
2862 return SOAP_OK; | |
2863 } catch(char *err) { | |
2864 soap_receiver_fault(soap, err, ""); | |
2865 return SOAP_FAULT; | |
2866 } | |
2867 } | |
2868 | |
2869 // Literal translation of command line to web service | |
2870 | |
2871 int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResponse &adbQueryResponse){ | |
2872 char queryType[256]; | |
2873 for(int k=0; k<256; k++) | |
2874 queryType[k]='\0'; | |
2875 if(qType == O2_POINT_QUERY) | |
2876 strncpy(queryType, "point", strlen("point")); | |
2877 else if (qType == O2_SEQUENCE_QUERY) | |
2878 strncpy(queryType, "sequence", strlen("sequence")); | |
2879 else if(qType == O2_TRACK_QUERY) | |
2880 strncpy(queryType,"track", strlen("track")); | |
2881 else | |
2882 strncpy(queryType, "", strlen("")); | |
2883 | |
2884 if(pointNN==0) | |
2885 pointNN=10; | |
2886 if(trackNN==0) | |
2887 trackNN=10; | |
2888 if(seqLen==0) | |
2889 seqLen=16; | |
2890 | |
2891 char qPosStr[256]; | |
2892 sprintf(qPosStr, "%d", qPos); | |
2893 char pointNNStr[256]; | |
2894 sprintf(pointNNStr,"%d",pointNN); | |
2895 char trackNNStr[256]; | |
2896 sprintf(trackNNStr,"%d",trackNN); | |
2897 char seqLenStr[256]; | |
2898 sprintf(seqLenStr,"%d",seqLen); | |
2899 | |
2900 const char* argv[] ={ | |
2901 "./audioDB", | |
2902 COM_QUERY, | |
2903 queryType, // Need to pass a parameter | |
2904 COM_DATABASE, | |
2905 ENSURE_STRING(dbName), | |
2906 COM_FEATURES, | |
2907 ENSURE_STRING(qKey), | |
2908 COM_KEYLIST, | |
2909 ENSURE_STRING(keyList), | |
2910 COM_TIMES, | |
2911 ENSURE_STRING(timesFileName), | |
2912 COM_QPOINT, | |
2913 qPosStr, | |
2914 COM_POINTNN, | |
2915 pointNNStr, | |
2916 COM_TRACKNN, | |
2917 trackNNStr, // Need to pass a parameter | |
2918 COM_SEQLEN, | |
2919 seqLenStr | |
2920 }; | |
2921 | |
2922 const unsigned argc = 19; | |
2923 try { | |
2924 audioDB(argc, (char* const*)argv, &adbQueryResponse); | |
2925 return SOAP_OK; | |
2926 } catch (char *err) { | |
2927 soap_receiver_fault(soap, err, ""); | |
2928 return SOAP_FAULT; | |
2929 } | |
2930 } | |
2931 | |
2932 int adb__sequenceQuery(struct soap* soap, xsd__string dbName, xsd__string qKey, | |
2933 adb__sequenceQueryParms *parms, | |
2934 adb__queryResponse &adbQueryResponse) { | |
2935 | |
2936 char qPosStr[256]; | |
2937 char pointNNStr[256]; | |
2938 char trackNNStr[256]; | |
2939 char seqLenStr[256]; | |
2940 char relative_thresholdStr[256]; | |
2941 char absolute_thresholdStr[256]; | |
2942 | |
2943 /* When the branch is merged, move this to a header and use it | |
2944 elsewhere */ | |
2945 #define INTSTRINGIFY(val, str) \ | |
2946 snprintf(str, 256, "%d", val); | |
2947 #define DOUBLESTRINGIFY(val, str) \ | |
2948 snprintf(str, 256, "%f", val); | |
2949 | |
2950 INTSTRINGIFY(parms->qPos, qPosStr); | |
2951 INTSTRINGIFY(parms->pointNN, pointNNStr); | |
2952 INTSTRINGIFY(parms->segNN, trackNNStr); | |
2953 /* FIXME: decide which of segLen and seqLen should live */ | |
2954 INTSTRINGIFY(parms->segLen, seqLenStr); | |
2955 | |
2956 DOUBLESTRINGIFY(parms->relative_threshold, relative_thresholdStr); | |
2957 DOUBLESTRINGIFY(parms->absolute_threshold, absolute_thresholdStr); | |
2958 | |
2959 const char *argv[] = { | |
2960 "./audioDB", | |
2961 COM_QUERY, | |
2962 "sequence", | |
2963 COM_DATABASE, | |
2964 dbName, | |
2965 COM_FEATURES, | |
2966 qKey, | |
2967 COM_KEYLIST, | |
2968 /* FIXME: when this branch is merged, use ENSURE_STRING */ | |
2969 parms->keyList==0?"":parms->keyList, | |
2970 COM_TIMES, | |
2971 parms->timesFileName==0?"":parms->timesFileName, | |
2972 COM_QUERYPOWER, | |
2973 parms->powerFileName==0?"":parms->powerFileName, | |
2974 COM_QPOINT, | |
2975 qPosStr, | |
2976 COM_POINTNN, | |
2977 pointNNStr, | |
2978 COM_TRACKNN, | |
2979 trackNNStr, | |
2980 COM_SEQLEN, | |
2981 seqLenStr, | |
2982 COM_RELATIVE_THRESH, | |
2983 relative_thresholdStr, | |
2984 COM_ABSOLUTE_THRESH, | |
2985 absolute_thresholdStr | |
2986 }; | |
2987 | |
2988 const unsigned argc = 25; | |
2989 | |
2990 try { | |
2991 audioDB(argc, (char* const*)argv, &adbQueryResponse); | |
2992 return SOAP_OK; | |
2993 } catch (char *err) { | |
2994 soap_receiver_fault(soap, err, ""); | |
2995 return SOAP_FAULT; | |
2996 } | 467 } |
2997 } | 468 } |
2998 | 469 |
2999 int main(const unsigned argc, char* const argv[]){ | 470 int main(const unsigned argc, char* const argv[]){ |
3000 audioDB(argc, argv); | 471 audioDB(argc, argv); |