Mercurial > hg > audiodb
comparison audioDB.cpp @ 239:2cc06e5b05a5
Merge refactoring branch.
Bug fixes:
* 64-bit powertable bug;
* -inf - -inf bug;
* use new times information;
* plus short track, O2_MAXFILES and structure padding ABI fixes (already
backported)
Major code changes:
* split source into functional units, known as 'files';
* Reporter class for accumulating and reporting on query results;
* much OAOOization, mostly from above: net 800 LOC (25%) shorter.
author | mas01cr |
---|---|
date | Thu, 13 Dec 2007 14:23:32 +0000 |
parents | 3a81da6fb1d7 |
children | 5682c7d7444b |
comparison
equal
deleted
inserted
replaced
224:3a81da6fb1d7 | 239:2cc06e5b05a5 |
---|---|
1 #include "audioDB.h" | 1 #include "audioDB.h" |
2 | |
3 #if defined(O2_DEBUG) | |
4 void sigterm_action(int signal, siginfo_t *info, void *context) { | |
5 exit(128+signal); | |
6 } | |
7 | |
8 void sighup_action(int signal, siginfo_t *info, void *context) { | |
9 // FIXME: reread any configuration files | |
10 } | |
11 #endif | |
12 | |
13 void audioDB::error(const char* a, const char* b, const char *sysFunc) { | |
14 if(isServer) { | |
15 /* FIXME: I think this is leaky -- we never delete err. actually | |
16 deleting it is tricky, though; it gets placed into some | |
17 soap-internal struct with uncertain extent... -- CSR, | |
18 2007-10-01 */ | |
19 char *err = new char[256]; /* FIXME: overflows */ | |
20 snprintf(err, 255, "%s: %s\n%s", a, b, sysFunc ? strerror(errno) : ""); | |
21 /* FIXME: actually we could usefully do with a properly structured | |
22 type, so that we can throw separate faultstring and details. | |
23 -- CSR, 2007-10-01 */ | |
24 throw(err); | |
25 } else { | |
26 cerr << a << ": " << b << endl; | |
27 if (sysFunc) { | |
28 perror(sysFunc); | |
29 } | |
30 exit(1); | |
31 } | |
32 } | |
33 | 2 |
34 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS | 3 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS |
35 { | 4 { |
36 if(processArgs(argc, argv)<0){ | 5 if(processArgs(argc, argv)<0){ |
37 printf("No command found.\n"); | 6 printf("No command found.\n"); |
157 cmdline_parser_print_help(); | 126 cmdline_parser_print_help(); |
158 exit(0); | 127 exit(0); |
159 } | 128 } |
160 | 129 |
161 if(args_info.verbosity_given){ | 130 if(args_info.verbosity_given){ |
162 verbosity=args_info.verbosity_arg; | 131 verbosity = args_info.verbosity_arg; |
163 if(verbosity<0 || verbosity>10){ | 132 if(verbosity < 0 || verbosity > 10){ |
164 cerr << "Warning: verbosity out of range, setting to 1" << endl; | 133 std::cerr << "Warning: verbosity out of range, setting to 1" << std::endl; |
165 verbosity=1; | 134 verbosity = 1; |
166 } | 135 } |
167 } | 136 } |
168 | 137 |
169 if(args_info.size_given) { | 138 if(args_info.size_given) { |
170 if (args_info.size_arg < 50 || args_info.size_arg > 32000) { | 139 if (args_info.size_arg < 50 || args_info.size_arg > 32000) { |
171 error("Size out of range", ""); | 140 error("Size out of range", ""); |
172 } | 141 } |
173 size = (off_t) args_info.size_arg * 1000000; | 142 size = (off_t) args_info.size_arg * 1000000; |
174 } | 143 } |
175 | 144 |
176 if(args_info.radius_given){ | 145 if(args_info.radius_given) { |
177 radius=args_info.radius_arg; | 146 radius = args_info.radius_arg; |
178 if(radius<=0 || radius>1000000000){ | 147 if(radius <= 0 || radius > 1000000000) { |
179 error("radius out of range"); | 148 error("radius out of range"); |
180 } | 149 } else { |
181 else | 150 VERB_LOG(3, "Setting radius to %f\n", radius); |
182 if(verbosity>3) { | 151 } |
183 cerr << "Setting radius to " << radius << endl; | |
184 } | |
185 } | 152 } |
186 | 153 |
187 if(args_info.SERVER_given){ | 154 if(args_info.SERVER_given){ |
188 command=COM_SERVER; | 155 command=COM_SERVER; |
189 port=args_info.SERVER_arg; | 156 port=args_info.SERVER_arg; |
247 if(args_info.key_given) | 214 if(args_info.key_given) |
248 key=args_info.key_arg; | 215 key=args_info.key_arg; |
249 if(args_info.times_given){ | 216 if(args_info.times_given){ |
250 timesFileName=args_info.times_arg; | 217 timesFileName=args_info.times_arg; |
251 if(strlen(timesFileName)>0){ | 218 if(strlen(timesFileName)>0){ |
252 if(!(timesFile = new ifstream(timesFileName,ios::in))) | 219 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) |
253 error("Could not open times file for reading", timesFileName); | 220 error("Could not open times file for reading", timesFileName); |
254 usingTimes=1; | 221 usingTimes=1; |
255 } | 222 } |
256 } | 223 } |
257 if (args_info.power_given) { | 224 if (args_info.power_given) { |
274 key=args_info.keyList_arg; // INCONSISTENT NO CHECK | 241 key=args_info.keyList_arg; // INCONSISTENT NO CHECK |
275 | 242 |
276 /* TO DO: REPLACE WITH | 243 /* TO DO: REPLACE WITH |
277 if(args_info.keyList_given){ | 244 if(args_info.keyList_given){ |
278 trackFileName=args_info.keyList_arg; | 245 trackFileName=args_info.keyList_arg; |
279 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) | 246 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in))) |
280 error("Could not open keyList file for reading",trackFileName); | 247 error("Could not open keyList file for reading",trackFileName); |
281 } | 248 } |
282 AND UPDATE BATCHINSERT() | 249 AND UPDATE BATCHINSERT() |
283 */ | 250 */ |
284 | 251 |
285 if(args_info.timesList_given){ | 252 if(args_info.timesList_given){ |
286 timesFileName=args_info.timesList_arg; | 253 timesFileName=args_info.timesList_arg; |
287 if(strlen(timesFileName)>0){ | 254 if(strlen(timesFileName)>0){ |
288 if(!(timesFile = new ifstream(timesFileName,ios::in))) | 255 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) |
289 error("Could not open timesList file for reading", timesFileName); | 256 error("Could not open timesList file for reading", timesFileName); |
290 usingTimes=1; | 257 usingTimes=1; |
291 } | 258 } |
292 } | 259 } |
293 if(args_info.powerList_given){ | 260 if(args_info.powerList_given){ |
294 powerFileName=args_info.powerList_arg; | 261 powerFileName=args_info.powerList_arg; |
295 if(strlen(powerFileName)>0){ | 262 if(strlen(powerFileName)>0){ |
296 if(!(powerFile = new ifstream(powerFileName,ios::in))) | 263 if(!(powerFile = new std::ifstream(powerFileName,std::ios::in))) |
297 error("Could not open powerList file for reading", powerFileName); | 264 error("Could not open powerList file for reading", powerFileName); |
298 usingPower=1; | 265 usingPower=1; |
299 } | 266 } |
300 } | 267 } |
301 return 0; | 268 return 0; |
307 dbName=args_info.database_arg; | 274 dbName=args_info.database_arg; |
308 inFile=args_info.features_arg; | 275 inFile=args_info.features_arg; |
309 | 276 |
310 if(args_info.keyList_given){ | 277 if(args_info.keyList_given){ |
311 trackFileName=args_info.keyList_arg; | 278 trackFileName=args_info.keyList_arg; |
312 if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) | 279 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in))) |
313 error("Could not open keyList file for reading",trackFileName); | 280 error("Could not open keyList file for reading",trackFileName); |
314 } | 281 } |
315 | 282 |
316 if(args_info.times_given){ | 283 if(args_info.times_given){ |
317 timesFileName=args_info.times_arg; | 284 timesFileName=args_info.times_arg; |
318 if(strlen(timesFileName)>0){ | 285 if(strlen(timesFileName)>0){ |
319 if(!(timesFile = new ifstream(timesFileName,ios::in))) | 286 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) |
320 error("Could not open times file for reading", timesFileName); | 287 error("Could not open times file for reading", timesFileName); |
321 usingTimes=1; | 288 usingTimes=1; |
322 } | 289 } |
323 } | 290 } |
324 | 291 |
380 return 0; | 347 return 0; |
381 } | 348 } |
382 return -1; // no command found | 349 return -1; // no command found |
383 } | 350 } |
384 | 351 |
385 void audioDB::get_lock(int fd, bool exclusive) { | |
386 struct flock lock; | |
387 int status; | |
388 | |
389 lock.l_type = exclusive ? F_WRLCK : F_RDLCK; | |
390 lock.l_whence = SEEK_SET; | |
391 lock.l_start = 0; | |
392 lock.l_len = 0; /* "the whole file" */ | |
393 | |
394 retry: | |
395 do { | |
396 status = fcntl(fd, F_SETLKW, &lock); | |
397 } while (status != 0 && errno == EINTR); | |
398 | |
399 if (status) { | |
400 if (errno == EAGAIN) { | |
401 sleep(1); | |
402 goto retry; | |
403 } else { | |
404 error("fcntl lock error", "", "fcntl"); | |
405 } | |
406 } | |
407 } | |
408 | |
409 void audioDB::release_lock(int fd) { | |
410 struct flock lock; | |
411 int status; | |
412 | |
413 lock.l_type = F_UNLCK; | |
414 lock.l_whence = SEEK_SET; | |
415 lock.l_start = 0; | |
416 lock.l_len = 0; | |
417 | |
418 status = fcntl(fd, F_SETLKW, &lock); | |
419 | |
420 if (status) | |
421 error("fcntl unlock error", "", "fcntl"); | |
422 } | |
423 | |
424 /* Make a new database. | |
425 | |
426 The database consists of: | |
427 | |
428 * a header (see dbTableHeader struct definition); | |
429 * keyTable: list of keys of tracks; | |
430 * trackTable: Maps implicit feature index to a feature vector | |
431 matrix (sizes of tracks) | |
432 * featureTable: Lots of doubles; | |
433 * timesTable: (start,end) time points for each feature vector; | |
434 * powerTable: associated power for each feature vector; | |
435 * l2normTable: squared l2norms for each feature vector. | |
436 */ | |
437 void audioDB::create(const char* dbName){ | |
438 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) | |
439 error("Can't create database file", dbName, "open"); | |
440 get_lock(dbfid, 1); | |
441 | |
442 if(verbosity) { | |
443 cerr << "header size:" << O2_HEADERSIZE << endl; | |
444 } | |
445 | |
446 dbH = new dbTableHeaderT(); | |
447 assert(dbH); | |
448 | |
449 unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE); | |
450 | |
451 // Initialize header | |
452 dbH->magic = O2_MAGIC; | |
453 dbH->version = O2_FORMAT_VERSION; | |
454 dbH->numFiles = 0; | |
455 dbH->dim = 0; | |
456 dbH->flags = 0; | |
457 dbH->headerSize = O2_HEADERSIZE; | |
458 dbH->length = 0; | |
459 dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE); | |
460 dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles); | |
461 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles); | |
462 dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); | |
463 dbH->powerTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); | |
464 dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->powerTableOffset - 2*maxfiles*O2_MEANNUMVECTORS*sizeof(double)); | |
465 dbH->dbSize = size; | |
466 | |
467 write(dbfid, dbH, O2_HEADERSIZE); | |
468 | |
469 // go to the location corresponding to the last byte | |
470 if (lseek (dbfid, size - 1, SEEK_SET) == -1) | |
471 error("lseek error in db file", "", "lseek"); | |
472 | |
473 // write a dummy byte at the last location | |
474 if (write (dbfid, "", 1) != 1) | |
475 error("write error", "", "write"); | |
476 | |
477 if(verbosity) { | |
478 cerr << COM_CREATE << " " << dbName << endl; | |
479 } | |
480 } | |
481 | |
482 void audioDB::drop(){ | |
483 // FIXME: drop something? Should we even allow this? | |
484 } | |
485 | |
486 void audioDB::initDBHeader(const char* dbName) { | |
487 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { | |
488 error("Can't open database file", dbName, "open"); | |
489 } | |
490 | |
491 get_lock(dbfid, forWrite); | |
492 // Get the database header info | |
493 dbH = new dbTableHeaderT(); | |
494 assert(dbH); | |
495 | |
496 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { | |
497 error("error reading db header", dbName, "read"); | |
498 } | |
499 | |
500 if(dbH->magic == O2_OLD_MAGIC) { | |
501 // FIXME: if anyone ever complains, write the program to convert | |
502 // from the old audioDB format to the new... | |
503 error("database file has old O2 header", dbName); | |
504 } | |
505 | |
506 if(dbH->magic != O2_MAGIC) { | |
507 cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl; | |
508 error("database file has incorrect header", dbName); | |
509 } | |
510 | |
511 if(dbH->version != O2_FORMAT_VERSION) { | |
512 error("database file has incorrect version", dbName); | |
513 } | |
514 | |
515 if(dbH->headerSize != O2_HEADERSIZE) { | |
516 error("sizeof(dbTableHeader) unexpected: platform ABI mismatch?", dbName); | |
517 } | |
518 | |
519 #define CHECKED_MMAP(type, var, start, length) \ | |
520 { void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \ | |
521 if(tmp == (void *) -1) { \ | |
522 error("mmap error for db table", #var, "mmap"); \ | |
523 } \ | |
524 var = (type) tmp; \ | |
525 } | |
526 | |
527 CHECKED_MMAP(char *, db, 0, getpagesize()); | |
528 | |
529 // Make some handy tables with correct types | |
530 if(forWrite || (dbH->length > 0)) { | |
531 if(forWrite) { | |
532 fileTableLength = dbH->trackTableOffset - dbH->fileTableOffset; | |
533 trackTableLength = dbH->dataOffset - dbH->trackTableOffset; | |
534 dataBufLength = dbH->timesTableOffset - dbH->dataOffset; | |
535 timesTableLength = dbH->powerTableOffset - dbH->timesTableOffset; | |
536 powerTableLength = dbH->l2normTableOffset - dbH->powerTableOffset; | |
537 l2normTableLength = dbH->dbSize - dbH->l2normTableOffset; | |
538 } else { | |
539 fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE); | |
540 trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE); | |
541 dataBufLength = ALIGN_PAGE_UP(dbH->length); | |
542 timesTableLength = ALIGN_PAGE_UP(2*(dbH->length / dbH->dim)); | |
543 powerTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim); | |
544 l2normTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim); | |
545 } | |
546 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, fileTableLength); | |
547 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, trackTableLength); | |
548 /* | |
549 * No more mmap() for dataBuf | |
550 * | |
551 * FIXME: Actually we do do the mmap() in the two cases where it's | |
552 * still "needed": in pointQuery and in l2norm if dbH->length is | |
553 * non-zero. Removing those cases too (and deleting the dataBuf | |
554 * variable completely) would be cool. -- CSR, 2007-11-19 | |
555 * | |
556 * CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); | |
557 */ | |
558 CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, timesTableLength); | |
559 CHECKED_MMAP(double *, powerTable, dbH->powerTableOffset, powerTableLength); | |
560 CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, l2normTableLength); | |
561 } | |
562 } | |
563 | |
564 void audioDB::initInputFile (const char *inFile) { | |
565 if (inFile) { | |
566 if ((infid = open(inFile, O_RDONLY)) < 0) { | |
567 error("can't open input file for reading", inFile, "open"); | |
568 } | |
569 | |
570 if (fstat(infid, &statbuf) < 0) { | |
571 error("fstat error finding size of input", inFile, "fstat"); | |
572 } | |
573 | |
574 if(dbH->dim == 0 && dbH->length == 0) { // empty database | |
575 // initialize with input dimensionality | |
576 if(read(infid, &dbH->dim, sizeof(unsigned)) != sizeof(unsigned)) { | |
577 error("short read of input file", inFile); | |
578 } | |
579 if(dbH->dim == 0) { | |
580 error("dimensionality of zero in input file", inFile); | |
581 } | |
582 } else { | |
583 unsigned test; | |
584 if(read(infid, &test, sizeof(unsigned)) != sizeof(unsigned)) { | |
585 error("short read of input file", inFile); | |
586 } | |
587 if(dbH->dim == 0) { | |
588 error("dimensionality of zero in input file", inFile); | |
589 } | |
590 if(dbH->dim != test) { | |
591 cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <<endl; | |
592 error("feature dimensions do not match database table dimensions", inFile); | |
593 } | |
594 } | |
595 | |
596 if ((indata = (char *) mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) == (caddr_t) -1) { | |
597 error("mmap error for input", inFile, "mmap"); | |
598 } | |
599 } | |
600 } | |
601 | |
602 void audioDB::initTables(const char* dbName, const char* inFile = 0) { | |
603 initDBHeader(dbName); | |
604 initInputFile(inFile); | |
605 } | |
606 | |
607 bool audioDB::enough_data_space_free(off_t size) { | |
608 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size); | |
609 } | |
610 | |
611 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) { | |
612 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET); | |
613 write(dbfid, buffer, size); | |
614 } | |
615 | |
616 void audioDB::insert(const char* dbName, const char* inFile) { | |
617 forWrite = true; | |
618 initTables(dbName, inFile); | |
619 | |
620 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
621 error("Must use timestamps with timestamped database","use --times"); | |
622 | |
623 if(!usingPower && (dbH->flags & O2_FLAG_POWER)) | |
624 error("Must use power with power-enabled database", dbName); | |
625 | |
626 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { | |
627 error("Insert failed: no more room in database", inFile); | |
628 } | |
629 | |
630 if(!key) | |
631 key=inFile; | |
632 // Linear scan of filenames check for pre-existing feature | |
633 unsigned alreadyInserted=0; | |
634 for(unsigned k=0; k<dbH->numFiles; k++) | |
635 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){ | |
636 alreadyInserted=1; | |
637 break; | |
638 } | |
639 | |
640 if(alreadyInserted){ | |
641 if(verbosity) { | |
642 cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl; | |
643 } | |
644 return; | |
645 } | |
646 | |
647 // Make a track index table of features to file indexes | |
648 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
649 if(!numVectors){ | |
650 if(verbosity) { | |
651 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; | |
652 } | |
653 // CLEAN UP | |
654 munmap(indata,statbuf.st_size); | |
655 munmap(db,dbH->dbSize); | |
656 close(infid); | |
657 return; | |
658 } | |
659 | |
660 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); | |
661 | |
662 off_t insertoffset = dbH->length;// Store current state | |
663 | |
664 // Check times status and insert times from file | |
665 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double)); | |
666 double *timesdata = timesTable + 2*indexoffset; | |
667 | |
668 if(2*(indexoffset + numVectors) > timesTableLength) { | |
669 error("out of space for times", key); | |
670 } | |
671 | |
672 if (usingTimes) { | |
673 insertTimeStamps(numVectors, timesFile, timesdata); | |
674 } | |
675 | |
676 double *powerdata = powerTable + indexoffset; | |
677 insertPowerData(numVectors, powerfd, powerdata); | |
678 | |
679 // Increment file count | |
680 dbH->numFiles++; | |
681 | |
682 // Update Header information | |
683 dbH->length+=(statbuf.st_size-sizeof(int)); | |
684 | |
685 // Update track to file index map | |
686 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned)); | |
687 | |
688 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int)); | |
689 | |
690 // Norm the vectors on input if the database is already L2 normed | |
691 if(dbH->flags & O2_FLAG_L2NORM) | |
692 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append | |
693 | |
694 // Report status | |
695 status(dbName); | |
696 if(verbosity) { | |
697 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " | |
698 << (statbuf.st_size-sizeof(int)) << " bytes." << endl; | |
699 } | |
700 | |
701 // Copy the header back to the database | |
702 memcpy (db, dbH, sizeof(dbTableHeaderT)); | |
703 | |
704 // CLEAN UP | |
705 munmap(indata,statbuf.st_size); | |
706 close(infid); | |
707 } | |
708 | |
709 void audioDB::insertTimeStamps(unsigned numVectors, ifstream *timesFile, double *timesdata) { | |
710 assert(usingTimes); | |
711 | |
712 unsigned numtimes = 0; | |
713 | |
714 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) { | |
715 dbH->flags=dbH->flags|O2_FLAG_TIMES; | |
716 } else if(!(dbH->flags & O2_FLAG_TIMES)) { | |
717 error("Timestamp file used with non-timestamped database", timesFileName); | |
718 } | |
719 | |
720 if(!timesFile->is_open()) { | |
721 error("problem opening times file on timestamped database", timesFileName); | |
722 } | |
723 | |
724 double timepoint, next; | |
725 *timesFile >> timepoint; | |
726 if (timesFile->eof()) { | |
727 error("no entries in times file", timesFileName); | |
728 } | |
729 numtimes++; | |
730 do { | |
731 *timesFile >> next; | |
732 if (timesFile->eof()) { | |
733 break; | |
734 } | |
735 numtimes++; | |
736 timesdata[0] = timepoint; | |
737 timepoint = (timesdata[1] = next); | |
738 timesdata += 2; | |
739 } while (numtimes < numVectors + 1); | |
740 | |
741 if (numtimes < numVectors + 1) { | |
742 error("too few timepoints in times file", timesFileName); | |
743 } | |
744 | |
745 *timesFile >> next; | |
746 if (!timesFile->eof()) { | |
747 error("too many timepoints in times file", timesFileName); | |
748 } | |
749 } | |
750 | |
751 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) { | |
752 if (usingPower) { | |
753 if (!(dbH->flags & O2_FLAG_POWER)) { | |
754 error("Cannot insert power data on non-power DB", dbName); | |
755 } | |
756 | |
757 int one; | |
758 unsigned int count; | |
759 | |
760 count = read(powerfd, &one, sizeof(unsigned int)); | |
761 if (count != sizeof(unsigned int)) { | |
762 error("powerfd read failed", "int", "read"); | |
763 } | |
764 if (one != 1) { | |
765 error("dimensionality of power file not 1", powerFileName); | |
766 } | |
767 | |
768 // FIXME: should check that the powerfile is the right size for | |
769 // this. -- CSR, 2007-10-30 | |
770 count = read(powerfd, powerdata, numVectors * sizeof(double)); | |
771 if (count != numVectors * sizeof(double)) { | |
772 error("powerfd read failed", "double", "read"); | |
773 } | |
774 } | |
775 } | |
776 | |
777 void audioDB::batchinsert(const char* dbName, const char* inFile) { | |
778 | |
779 forWrite = true; | |
780 initDBHeader(dbName); | |
781 | |
782 if(!key) | |
783 key=inFile; | |
784 ifstream *filesIn = 0; | |
785 ifstream *keysIn = 0; | |
786 ifstream* thisTimesFile = 0; | |
787 int thispowerfd = 0; | |
788 | |
789 if(!(filesIn = new ifstream(inFile))) | |
790 error("Could not open batch in file", inFile); | |
791 if(key && key!=inFile) | |
792 if(!(keysIn = new ifstream(key))) | |
793 error("Could not open batch key file",key); | |
794 | |
795 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
796 error("Must use timestamps with timestamped database","use --times"); | |
797 | |
798 if(!usingPower && (dbH->flags & O2_FLAG_POWER)) | |
799 error("Must use power with power-enabled database", dbName); | |
800 | |
801 unsigned totalVectors=0; | |
802 char *thisKey = new char[MAXSTR]; | |
803 char *thisFile = new char[MAXSTR]; | |
804 char *thisTimesFileName = new char[MAXSTR]; | |
805 char *thisPowerFileName = new char[MAXSTR]; | |
806 | |
807 do{ | |
808 filesIn->getline(thisFile,MAXSTR); | |
809 if(key && key!=inFile) | |
810 keysIn->getline(thisKey,MAXSTR); | |
811 else | |
812 thisKey = thisFile; | |
813 if(usingTimes) | |
814 timesFile->getline(thisTimesFileName,MAXSTR); | |
815 if(usingPower) | |
816 powerFile->getline(thisPowerFileName, MAXSTR); | |
817 | |
818 if(filesIn->eof()) | |
819 break; | |
820 | |
821 initInputFile(thisFile); | |
822 | |
823 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { | |
824 error("batchinsert failed: no more room in database", thisFile); | |
825 } | |
826 | |
827 // Linear scan of filenames check for pre-existing feature | |
828 unsigned alreadyInserted=0; | |
829 | |
830 for(unsigned k=0; k<dbH->numFiles; k++) | |
831 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){ | |
832 alreadyInserted=1; | |
833 break; | |
834 } | |
835 | |
836 if(alreadyInserted){ | |
837 if(verbosity) { | |
838 cerr << "Warning: key already exists in database:" << thisKey << endl; | |
839 } | |
840 } | |
841 else{ | |
842 | |
843 // Make a track index table of features to file indexes | |
844 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
845 if(!numVectors){ | |
846 if(verbosity) { | |
847 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; | |
848 } | |
849 } | |
850 else{ | |
851 if(usingTimes){ | |
852 if(timesFile->eof()) { | |
853 error("not enough timestamp files in timesList", timesFileName); | |
854 } | |
855 thisTimesFile = new ifstream(thisTimesFileName,ios::in); | |
856 if(!thisTimesFile->is_open()) { | |
857 error("Cannot open timestamp file", thisTimesFileName); | |
858 } | |
859 off_t insertoffset = dbH->length; | |
860 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double)); | |
861 double *timesdata = timesTable + 2*indexoffset; | |
862 if(2*(indexoffset + numVectors) > timesTableLength) { | |
863 error("out of space for times", key); | |
864 } | |
865 insertTimeStamps(numVectors, thisTimesFile, timesdata); | |
866 if(thisTimesFile) | |
867 delete thisTimesFile; | |
868 } | |
869 | |
870 if (usingPower) { | |
871 if(powerFile->eof()) { | |
872 error("not enough power files in powerList", powerFileName); | |
873 } | |
874 thispowerfd = open(thisPowerFileName, O_RDONLY); | |
875 if (thispowerfd < 0) { | |
876 error("failed to open power file", thisPowerFileName); | |
877 } | |
878 unsigned insertoffset = dbH->length; | |
879 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double)); | |
880 double *powerdata = powerTable + poweroffset; | |
881 insertPowerData(numVectors, thispowerfd, powerdata); | |
882 if (0 < thispowerfd) { | |
883 close(thispowerfd); | |
884 } | |
885 } | |
886 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); | |
887 | |
888 off_t insertoffset = dbH->length;// Store current state | |
889 | |
890 // Increment file count | |
891 dbH->numFiles++; | |
892 | |
893 // Update Header information | |
894 dbH->length+=(statbuf.st_size-sizeof(int)); | |
895 | |
896 // Update track to file index map | |
897 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | |
898 | |
899 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int)); | |
900 | |
901 // Norm the vectors on input if the database is already L2 normed | |
902 if(dbH->flags & O2_FLAG_L2NORM) | |
903 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append | |
904 | |
905 totalVectors+=numVectors; | |
906 | |
907 // Copy the header back to the database | |
908 memcpy (db, dbH, sizeof(dbTableHeaderT)); | |
909 } | |
910 } | |
911 // CLEAN UP | |
912 munmap(indata,statbuf.st_size); | |
913 close(infid); | |
914 }while(!filesIn->eof()); | |
915 | |
916 if(verbosity) { | |
917 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " | |
918 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; | |
919 } | |
920 | |
921 // Report status | |
922 status(dbName); | |
923 } | |
924 | |
925 // FIXME: this can't propagate the sequence length argument (used for | |
926 // dudCount). See adb__status() definition for the other half of | |
927 // this. -- CSR, 2007-10-01 | |
928 void audioDB::ws_status(const char*dbName, char* hostport){ | |
929 struct soap soap; | |
930 adb__statusResponse adbStatusResponse; | |
931 | |
932 // Query an existing adb database | |
933 soap_init(&soap); | |
934 if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResponse)==SOAP_OK) { | |
935 cout << "numFiles = " << adbStatusResponse.result.numFiles << endl; | |
936 cout << "dim = " << adbStatusResponse.result.dim << endl; | |
937 cout << "length = " << adbStatusResponse.result.length << endl; | |
938 cout << "dudCount = " << adbStatusResponse.result.dudCount << endl; | |
939 cout << "nullCount = " << adbStatusResponse.result.nullCount << endl; | |
940 cout << "flags = " << adbStatusResponse.result.flags << endl; | |
941 } else { | |
942 soap_print_fault(&soap,stderr); | |
943 } | |
944 | |
945 soap_destroy(&soap); | |
946 soap_end(&soap); | |
947 soap_done(&soap); | |
948 } | |
949 | |
950 void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){ | |
951 struct soap soap; | |
952 adb__queryResponse adbQueryResponse; | |
953 | |
954 soap_init(&soap); | |
955 if(soap_call_adb__query(&soap,hostport,NULL, | |
956 (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName, | |
957 queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResponse)==SOAP_OK){ | |
958 //std::cerr << "result list length:" << adbQueryResponse.result.__sizeRlist << std::endl; | |
959 for(int i=0; i<adbQueryResponse.result.__sizeRlist; i++) | |
960 std::cout << adbQueryResponse.result.Rlist[i] << " " << adbQueryResponse.result.Dist[i] | |
961 << " " << adbQueryResponse.result.Qpos[i] << " " << adbQueryResponse.result.Spos[i] << std::endl; | |
962 } | |
963 else | |
964 soap_print_fault(&soap,stderr); | |
965 | |
966 soap_destroy(&soap); | |
967 soap_end(&soap); | |
968 soap_done(&soap); | |
969 | |
970 } | |
971 | |
972 | |
973 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){ | 352 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){ |
974 if(!dbH) | 353 if(!dbH) |
975 initTables(dbName, 0); | 354 initTables(dbName, 0); |
976 | 355 |
977 unsigned dudCount=0; | 356 unsigned dudCount=0; |
985 } | 364 } |
986 | 365 |
987 if(adbStatusResponse == 0) { | 366 if(adbStatusResponse == 0) { |
988 | 367 |
989 // Update Header information | 368 // Update Header information |
990 cout << "num files:" << dbH->numFiles << endl; | 369 std::cout << "num files:" << dbH->numFiles << std::endl; |
991 cout << "data dim:" << dbH->dim <<endl; | 370 std::cout << "data dim:" << dbH->dim <<std::endl; |
992 if(dbH->dim>0){ | 371 if(dbH->dim>0){ |
993 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl; | 372 std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl; |
994 cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << endl; | 373 std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl; |
995 } | 374 } |
996 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; | 375 std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; |
997 cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << | 376 std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << |
998 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; | 377 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; |
999 cout << "flags:" << dbH->flags << endl; | 378 std::cout << "flags:" << dbH->flags << std::endl; |
1000 | 379 |
1001 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; | 380 std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl; |
1002 } else { | 381 } else { |
1003 adbStatusResponse->result.numFiles = dbH->numFiles; | 382 adbStatusResponse->result.numFiles = dbH->numFiles; |
1004 adbStatusResponse->result.dim = dbH->dim; | 383 adbStatusResponse->result.dim = dbH->dim; |
1005 adbStatusResponse->result.length = dbH->length; | 384 adbStatusResponse->result.length = dbH->length; |
1006 adbStatusResponse->result.dudCount = dudCount; | 385 adbStatusResponse->result.dudCount = dudCount; |
1007 adbStatusResponse->result.nullCount = nullCount; | 386 adbStatusResponse->result.nullCount = nullCount; |
1008 adbStatusResponse->result.flags = dbH->flags; | 387 adbStatusResponse->result.flags = dbH->flags; |
1009 } | 388 } |
1010 } | |
1011 | |
1012 void audioDB::dump(const char* dbName){ | |
1013 if(!dbH) { | |
1014 initTables(dbName, 0); | |
1015 } | |
1016 | |
1017 if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) { | |
1018 error("error making output directory", output, "mkdir"); | |
1019 } | |
1020 | |
1021 char *cwd = new char[PATH_MAX]; | |
1022 | |
1023 if ((getcwd(cwd, PATH_MAX)) == 0) { | |
1024 error("error getting working directory", "", "getcwd"); | |
1025 } | |
1026 | |
1027 if((chdir(output)) < 0) { | |
1028 error("error changing working directory", output, "chdir"); | |
1029 } | |
1030 | |
1031 int fLfd, tLfd = 0, pLfd = 0, kLfd; | |
1032 FILE *fLFile, *tLFile = 0, *pLFile = 0, *kLFile; | |
1033 | |
1034 if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1035 error("error creating featureList file", "featureList.txt", "open"); | |
1036 } | |
1037 | |
1038 int times = dbH->flags & O2_FLAG_TIMES; | |
1039 if (times) { | |
1040 if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1041 error("error creating timesList file", "timesList.txt", "open"); | |
1042 } | |
1043 } | |
1044 | |
1045 int power = dbH->flags & O2_FLAG_POWER; | |
1046 if (power) { | |
1047 if ((pLfd = open("powerList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1048 error("error creating powerList file", "powerList.txt", "open"); | |
1049 } | |
1050 } | |
1051 | |
1052 if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1053 error("error creating keyList file", "keyList.txt", "open"); | |
1054 } | |
1055 | |
1056 /* can these fail? I sincerely hope not. */ | |
1057 fLFile = fdopen(fLfd, "w"); | |
1058 if (times) { | |
1059 tLFile = fdopen(tLfd, "w"); | |
1060 } | |
1061 if (power) { | |
1062 pLFile = fdopen(pLfd, "w"); | |
1063 } | |
1064 kLFile = fdopen(kLfd, "w"); | |
1065 | |
1066 char *fName = new char[256]; | |
1067 int ffd, pfd; | |
1068 FILE *tFile; | |
1069 unsigned pos = 0; | |
1070 lseek(dbfid, dbH->dataOffset, SEEK_SET); | |
1071 double *data_buffer; | |
1072 size_t data_buffer_size; | |
1073 for(unsigned k = 0; k < dbH->numFiles; k++) { | |
1074 fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE); | |
1075 snprintf(fName, 256, "%05d.features", k); | |
1076 if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1077 error("error creating feature file", fName, "open"); | |
1078 } | |
1079 if ((write(ffd, &dbH->dim, sizeof(uint32_t))) < 0) { | |
1080 error("error writing dimensions", fName, "write"); | |
1081 } | |
1082 | |
1083 /* FIXME: this repeated malloc()/free() of data buffers is | |
1084 inefficient. */ | |
1085 data_buffer_size = trackTable[k] * dbH->dim * sizeof(double); | |
1086 | |
1087 { | |
1088 void *tmp = malloc(data_buffer_size); | |
1089 if (tmp == NULL) { | |
1090 error("error allocating data buffer"); | |
1091 } | |
1092 data_buffer = (double *) tmp; | |
1093 } | |
1094 | |
1095 if ((read(dbfid, data_buffer, data_buffer_size)) != (ssize_t) data_buffer_size) { | |
1096 error("error reading data", fName, "read"); | |
1097 } | |
1098 | |
1099 if ((write(ffd, data_buffer, data_buffer_size)) < 0) { | |
1100 error("error writing data", fName, "write"); | |
1101 } | |
1102 | |
1103 free(data_buffer); | |
1104 | |
1105 fprintf(fLFile, "%s\n", fName); | |
1106 close(ffd); | |
1107 | |
1108 if (times) { | |
1109 snprintf(fName, 256, "%05d.times", k); | |
1110 tFile = fopen(fName, "w"); | |
1111 for(unsigned i = 0; i < trackTable[k]; i++) { | |
1112 // KLUDGE: specifying 16 digits of precision after the decimal | |
1113 // point is (but check this!) sufficient to uniquely identify | |
1114 // doubles; however, that will cause ugliness, as that's | |
1115 // vastly too many for most values of interest. Moving to %a | |
1116 // here and scanf() in the timesFile reading might fix this. | |
1117 // -- CSR, 2007-10-19 | |
1118 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*i)); | |
1119 } | |
1120 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*trackTable[k]-1)); | |
1121 | |
1122 fprintf(tLFile, "%s\n", fName); | |
1123 } | |
1124 | |
1125 if (power) { | |
1126 uint32_t one = 1; | |
1127 snprintf(fName, 256, "%05d.power", k); | |
1128 if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { | |
1129 error("error creating power file", fName, "open"); | |
1130 } | |
1131 if ((write(pfd, &one, sizeof(uint32_t))) < 0) { | |
1132 error("error writing one", fName, "write"); | |
1133 } | |
1134 if ((write(pfd, powerTable + pos, trackTable[k] * sizeof(double))) < 0) { | |
1135 error("error writing data", fName, "write"); | |
1136 } | |
1137 fprintf(pLFile, "%s\n", fName); | |
1138 close(pfd); | |
1139 } | |
1140 | |
1141 pos += trackTable[k]; | |
1142 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; | |
1143 } | |
1144 | |
1145 FILE *scriptFile; | |
1146 scriptFile = fopen("restore.sh", "w"); | |
1147 fprintf(scriptFile, "\ | |
1148 #! /bin/sh\n\ | |
1149 #\n\ | |
1150 # usage: AUDIODB=/path/to/audioDB sh ./restore.sh <newdb>\n\ | |
1151 \n\ | |
1152 if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\ | |
1153 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\ | |
1154 \"${AUDIODB}\" -d \"$1\" -N --size=%d\n", (int) (dbH->dbSize / 1000000)); | |
1155 if(dbH->flags & O2_FLAG_L2NORM) { | |
1156 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n"); | |
1157 } | |
1158 if(power) { | |
1159 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -P\n"); | |
1160 } | |
1161 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt"); | |
1162 if(times) { | |
1163 fprintf(scriptFile, " -T timesList.txt"); | |
1164 } | |
1165 if(power) { | |
1166 fprintf(scriptFile, " -W powerList.txt"); | |
1167 } | |
1168 fprintf(scriptFile, "\n"); | |
1169 fclose(scriptFile); | |
1170 | |
1171 if((chdir(cwd)) < 0) { | |
1172 error("error changing working directory", cwd, "chdir"); | |
1173 } | |
1174 | |
1175 fclose(fLFile); | |
1176 if(times) { | |
1177 fclose(tLFile); | |
1178 } | |
1179 if(power) { | |
1180 fclose(pLFile); | |
1181 } | |
1182 fclose(kLFile); | |
1183 delete[] fName; | |
1184 | |
1185 status(dbName); | |
1186 } | 389 } |
1187 | 390 |
1188 void audioDB::l2norm(const char* dbName) { | 391 void audioDB::l2norm(const char* dbName) { |
1189 forWrite = true; | 392 forWrite = true; |
1190 initTables(dbName, 0); | 393 initTables(dbName, 0); |
1207 } | 410 } |
1208 dbH->flags |= O2_FLAG_POWER; | 411 dbH->flags |= O2_FLAG_POWER; |
1209 memcpy(db, dbH, O2_HEADERSIZE); | 412 memcpy(db, dbH, O2_HEADERSIZE); |
1210 } | 413 } |
1211 | 414 |
1212 bool audioDB::powers_acceptable(double p1, double p2) { | |
1213 if (use_absolute_threshold) { | |
1214 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) { | |
1215 return false; | |
1216 } | |
1217 } | |
1218 if (use_relative_threshold) { | |
1219 if (fabs(p1-p2) > fabs(relative_threshold)) { | |
1220 return false; | |
1221 } | |
1222 } | |
1223 return true; | |
1224 } | |
1225 | |
1226 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ | |
1227 switch(queryType){ | |
1228 case O2_POINT_QUERY: | |
1229 pointQuery(dbName, inFile, adbQueryResponse); | |
1230 break; | |
1231 case O2_SEQUENCE_QUERY: | |
1232 if(radius==0) | |
1233 trackSequenceQueryNN(dbName, inFile, adbQueryResponse); | |
1234 else | |
1235 trackSequenceQueryRad(dbName, inFile, adbQueryResponse); | |
1236 break; | |
1237 case O2_TRACK_QUERY: | |
1238 trackPointQuery(dbName, inFile, adbQueryResponse); | |
1239 break; | |
1240 default: | |
1241 error("unrecognized queryType in query()"); | |
1242 | |
1243 } | |
1244 } | |
1245 | |
1246 //return ordinal position of key in keyTable | |
1247 unsigned audioDB::getKeyPos(char* key){ | |
1248 for(unsigned k=0; k<dbH->numFiles; k++) | |
1249 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) | |
1250 return k; | |
1251 error("Key not found",key); | |
1252 return O2_ERR_KEYNOTFOUND; | |
1253 } | |
1254 | |
1255 // Basic point query engine | |
1256 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { | |
1257 | |
1258 initTables(dbName, inFile); | |
1259 | |
1260 // For each input vector, find the closest pointNN matching output vectors and report | |
1261 // we use stdout in this stub version | |
1262 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
1263 | |
1264 double* query = (double*)(indata+sizeof(int)); | |
1265 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); | |
1266 double* data = dataBuf; | |
1267 double* queryCopy = 0; | |
1268 | |
1269 if( dbH->flags & O2_FLAG_L2NORM ){ | |
1270 // Make a copy of the query | |
1271 queryCopy = new double[numVectors*dbH->dim]; | |
1272 qNorm = new double[numVectors]; | |
1273 assert(queryCopy&&qNorm); | |
1274 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | |
1275 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | |
1276 query = queryCopy; | |
1277 } | |
1278 | |
1279 // Make temporary dynamic memory for results | |
1280 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
1281 double distances[pointNN]; | |
1282 unsigned qIndexes[pointNN]; | |
1283 unsigned sIndexes[pointNN]; | |
1284 for(unsigned k=0; k<pointNN; k++){ | |
1285 distances[k]=-DBL_MAX; | |
1286 qIndexes[k]=~0; | |
1287 sIndexes[k]=~0; | |
1288 } | |
1289 | |
1290 unsigned j=numVectors; | |
1291 unsigned k,l,n; | |
1292 double thisDist; | |
1293 | |
1294 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double)); | |
1295 double meanQdur = 0; | |
1296 double *timesdata = 0; | |
1297 double *querydurs = 0; | |
1298 double *dbdurs = 0; | |
1299 | |
1300 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | |
1301 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | |
1302 usingTimes=0; | |
1303 } | |
1304 | |
1305 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
1306 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | |
1307 | |
1308 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | |
1309 timesdata = new double[2*numVectors]; | |
1310 querydurs = new double[numVectors]; | |
1311 insertTimeStamps(numVectors, timesFile, timesdata); | |
1312 // Calculate durations of points | |
1313 for(k=0; k<numVectors-1; k++){ | |
1314 querydurs[k]=timesdata[2*k+1]-timesdata[2*k]; | |
1315 meanQdur+=querydurs[k]; | |
1316 } | |
1317 meanQdur/=k; | |
1318 // Individual exhaustive timepoint durations | |
1319 dbdurs = new double[totalVecs]; | |
1320 for(k=0; k<totalVecs-1; k++) { | |
1321 dbdurs[k]=timesTable[2*k+1]-timesTable[2*k]; | |
1322 } | |
1323 } | |
1324 | |
1325 if(usingQueryPoint) | |
1326 if(queryPoint>numVectors-1) | |
1327 error("queryPoint > numVectors in query"); | |
1328 else{ | |
1329 if(verbosity>1) { | |
1330 cerr << "query point: " << queryPoint << endl; cerr.flush(); | |
1331 } | |
1332 query=query+queryPoint*dbH->dim; | |
1333 numVectors=queryPoint+1; | |
1334 j=1; | |
1335 } | |
1336 | |
1337 gettimeofday(&tv1, NULL); | |
1338 while(j--){ // query | |
1339 data=dataBuf; | |
1340 k=totalVecs; // number of database vectors | |
1341 while(k--){ // database | |
1342 thisDist=0; | |
1343 l=dbH->dim; | |
1344 double* q=query; | |
1345 while(l--) | |
1346 thisDist+=*q++**data++; | |
1347 if(!usingTimes || | |
1348 (usingTimes | |
1349 && fabs(dbdurs[totalVecs-k-1]-querydurs[numVectors-j-1])<querydurs[numVectors-j-1]*timesTol)){ | |
1350 n=pointNN; | |
1351 while(n--){ | |
1352 if(thisDist>=distances[n]){ | |
1353 if((n==0 || thisDist<=distances[n-1])){ | |
1354 // Copy all values above up the queue | |
1355 for( l=pointNN-1 ; l >= n+1 ; l--){ | |
1356 distances[l]=distances[l-1]; | |
1357 qIndexes[l]=qIndexes[l-1]; | |
1358 sIndexes[l]=sIndexes[l-1]; | |
1359 } | |
1360 distances[n]=thisDist; | |
1361 qIndexes[n]=numVectors-j-1; | |
1362 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1; | |
1363 break; | |
1364 } | |
1365 } | |
1366 else | |
1367 break; | |
1368 } | |
1369 } | |
1370 } | |
1371 // Move query pointer to next query point | |
1372 query+=dbH->dim; | |
1373 } | |
1374 | |
1375 gettimeofday(&tv2, NULL); | |
1376 if(verbosity>1) { | |
1377 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | |
1378 } | |
1379 | |
1380 if(adbQueryResponse==0){ | |
1381 // Output answer | |
1382 // Loop over nearest neighbours | |
1383 for(k=0; k < pointNN; k++){ | |
1384 // Scan for key | |
1385 unsigned cumTrack=0; | |
1386 for(l=0 ; l<dbH->numFiles; l++){ | |
1387 cumTrack+=trackTable[l]; | |
1388 if(sIndexes[k]<cumTrack){ | |
1389 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " | |
1390 << sIndexes[k]+trackTable[l]-cumTrack << endl; | |
1391 break; | |
1392 } | |
1393 } | |
1394 } | |
1395 } | |
1396 else{ // Process Web Services Query | |
1397 int listLen; | |
1398 for(k = 0; k < pointNN; k++) { | |
1399 if(distances[k] == -DBL_MAX) | |
1400 break; | |
1401 } | |
1402 listLen = k; | |
1403 | |
1404 adbQueryResponse->result.__sizeRlist=listLen; | |
1405 adbQueryResponse->result.__sizeDist=listLen; | |
1406 adbQueryResponse->result.__sizeQpos=listLen; | |
1407 adbQueryResponse->result.__sizeSpos=listLen; | |
1408 adbQueryResponse->result.Rlist= new char*[listLen]; | |
1409 adbQueryResponse->result.Dist = new double[listLen]; | |
1410 adbQueryResponse->result.Qpos = new unsigned int[listLen]; | |
1411 adbQueryResponse->result.Spos = new unsigned int[listLen]; | |
1412 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ | |
1413 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; | |
1414 adbQueryResponse->result.Dist[k]=distances[k]; | |
1415 adbQueryResponse->result.Qpos[k]=qIndexes[k]; | |
1416 unsigned cumTrack=0; | |
1417 for(l=0 ; l<dbH->numFiles; l++){ | |
1418 cumTrack+=trackTable[l]; | |
1419 if(sIndexes[k]<cumTrack){ | |
1420 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); | |
1421 break; | |
1422 } | |
1423 } | |
1424 adbQueryResponse->result.Spos[k]=sIndexes[k]+trackTable[l]-cumTrack; | |
1425 } | |
1426 } | |
1427 | |
1428 // Clean up | |
1429 if(queryCopy) | |
1430 delete queryCopy; | |
1431 if(qNorm) | |
1432 delete qNorm; | |
1433 if(timesdata) | |
1434 delete[] timesdata; | |
1435 if(querydurs) | |
1436 delete[] querydurs; | |
1437 if(dbdurs) | |
1438 delete dbdurs; | |
1439 } | |
1440 | |
1441 // trackPointQuery | |
1442 // return the trackNN closest tracks to the query track | |
1443 // uses average of pointNN points per track | |
1444 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { | |
1445 initTables(dbName, inFile); | |
1446 | |
1447 // For each input vector, find the closest pointNN matching output vectors and report | |
1448 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
1449 double* query = (double*)(indata+sizeof(int)); | |
1450 double* data; | |
1451 double* queryCopy = 0; | |
1452 | |
1453 if( dbH->flags & O2_FLAG_L2NORM ){ | |
1454 // Make a copy of the query | |
1455 queryCopy = new double[numVectors*dbH->dim]; | |
1456 qNorm = new double[numVectors]; | |
1457 assert(queryCopy&&qNorm); | |
1458 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | |
1459 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | |
1460 query = queryCopy; | |
1461 } | |
1462 | |
1463 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
1464 assert(trackNN>0 && trackNN<=O2_MAXNN); | |
1465 | |
1466 // Make temporary dynamic memory for results | |
1467 double trackDistances[trackNN]; | |
1468 unsigned trackIDs[trackNN]; | |
1469 unsigned trackQIndexes[trackNN]; | |
1470 unsigned trackSIndexes[trackNN]; | |
1471 | |
1472 double distances[pointNN]; | |
1473 unsigned qIndexes[pointNN]; | |
1474 unsigned sIndexes[pointNN]; | |
1475 | |
1476 unsigned j=numVectors; // number of query points | |
1477 unsigned k,l,n, track, trackOffset=0, processedTracks=0; | |
1478 double thisDist; | |
1479 | |
1480 for(k=0; k<pointNN; k++){ | |
1481 distances[k]=-DBL_MAX; | |
1482 qIndexes[k]=~0; | |
1483 sIndexes[k]=~0; | |
1484 } | |
1485 | |
1486 for(k=0; k<trackNN; k++){ | |
1487 trackDistances[k]=-DBL_MAX; | |
1488 trackQIndexes[k]=~0; | |
1489 trackSIndexes[k]=~0; | |
1490 trackIDs[k]=~0; | |
1491 } | |
1492 | |
1493 double meanQdur = 0; | |
1494 double *timesdata = 0; | |
1495 double *querydurs = 0; | |
1496 double *meanDBdur = 0; | |
1497 | |
1498 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | |
1499 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | |
1500 usingTimes=0; | |
1501 } | |
1502 | |
1503 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
1504 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | |
1505 | |
1506 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | |
1507 timesdata = new double[2*numVectors]; | |
1508 querydurs = new double[numVectors]; | |
1509 insertTimeStamps(numVectors, timesFile, timesdata); | |
1510 // Calculate durations of points | |
1511 for(k=0; k<numVectors-1; k++) { | |
1512 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | |
1513 meanQdur += querydurs[k]; | |
1514 } | |
1515 meanQdur/=k; | |
1516 meanDBdur = new double[dbH->numFiles]; | |
1517 for(k=0; k<dbH->numFiles; k++){ | |
1518 meanDBdur[k]=0.0; | |
1519 for(j=0; j<trackTable[k]-1 ; j++) { | |
1520 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j]; | |
1521 } | |
1522 meanDBdur[k]/=j; | |
1523 } | |
1524 } | |
1525 | |
1526 if(usingQueryPoint) | |
1527 if(queryPoint>numVectors-1) | |
1528 error("queryPoint > numVectors in query"); | |
1529 else{ | |
1530 if(verbosity>1) { | |
1531 cerr << "query point: " << queryPoint << endl; cerr.flush(); | |
1532 } | |
1533 query=query+queryPoint*dbH->dim; | |
1534 numVectors=queryPoint+1; | |
1535 } | |
1536 | |
1537 // build track offset table | |
1538 off_t *trackOffsetTable = new off_t[dbH->numFiles]; | |
1539 unsigned cumTrack=0; | |
1540 off_t trackIndexOffset; | |
1541 for(k=0; k<dbH->numFiles;k++){ | |
1542 trackOffsetTable[k]=cumTrack; | |
1543 cumTrack+=trackTable[k]*dbH->dim; | |
1544 } | |
1545 | |
1546 char nextKey[MAXSTR]; | |
1547 | |
1548 gettimeofday(&tv1, NULL); | |
1549 | |
1550 size_t data_buffer_size = 0; | |
1551 double *data_buffer = 0; | |
1552 lseek(dbfid, dbH->dataOffset, SEEK_SET); | |
1553 | |
1554 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ | |
1555 | |
1556 trackOffset = trackOffsetTable[track]; // numDoubles offset | |
1557 | |
1558 // get trackID from file if using a control file | |
1559 if(trackFile) { | |
1560 trackFile->getline(nextKey,MAXSTR); | |
1561 if(!trackFile->eof()) { | |
1562 track = getKeyPos(nextKey); | |
1563 trackOffset = trackOffsetTable[track]; | |
1564 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); | |
1565 } else { | |
1566 break; | |
1567 } | |
1568 } | |
1569 | |
1570 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | |
1571 | |
1572 if(verbosity>7) { | |
1573 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); | |
1574 } | |
1575 | |
1576 if(dbH->flags & O2_FLAG_L2NORM) | |
1577 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; | |
1578 else | |
1579 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); | |
1580 if(usingQueryPoint) | |
1581 j=1; | |
1582 else | |
1583 j=numVectors; | |
1584 | |
1585 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { | |
1586 if(data_buffer) { | |
1587 free(data_buffer); | |
1588 } | |
1589 { | |
1590 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; | |
1591 void *tmp = malloc(data_buffer_size); | |
1592 if (tmp == NULL) { | |
1593 error("error allocating data buffer"); | |
1594 } | |
1595 data_buffer = (double *) tmp; | |
1596 } | |
1597 } | |
1598 | |
1599 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); | |
1600 | |
1601 while(j--){ | |
1602 k=trackTable[track]; // number of vectors in track | |
1603 data=data_buffer; // data for track | |
1604 while(k--){ | |
1605 thisDist=0; | |
1606 l=dbH->dim; | |
1607 double* q=query; | |
1608 while(l--) | |
1609 thisDist+=*q++**data++; | |
1610 if(!usingTimes || | |
1611 (usingTimes | |
1612 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ | |
1613 n=pointNN; | |
1614 while(n--){ | |
1615 if(thisDist>=distances[n]){ | |
1616 if((n==0 || thisDist<=distances[n-1])){ | |
1617 // Copy all values above up the queue | |
1618 for( l=pointNN-1 ; l > n ; l--){ | |
1619 distances[l]=distances[l-1]; | |
1620 qIndexes[l]=qIndexes[l-1]; | |
1621 sIndexes[l]=sIndexes[l-1]; | |
1622 } | |
1623 distances[n]=thisDist; | |
1624 qIndexes[n]=numVectors-j-1; | |
1625 sIndexes[n]=trackTable[track]-k-1; | |
1626 break; | |
1627 } | |
1628 } | |
1629 else | |
1630 break; | |
1631 } | |
1632 } | |
1633 } // track | |
1634 // Move query pointer to next query point | |
1635 query+=dbH->dim; | |
1636 } // query | |
1637 // Take the average of this track's distance | |
1638 // Test the track distances | |
1639 thisDist=0; | |
1640 for (n = 0; n < pointNN; n++) { | |
1641 if (distances[n] == -DBL_MAX) break; | |
1642 thisDist += distances[n]; | |
1643 } | |
1644 thisDist /= n; | |
1645 | |
1646 n=trackNN; | |
1647 while(n--){ | |
1648 if(thisDist>=trackDistances[n]){ | |
1649 if((n==0 || thisDist<=trackDistances[n-1])){ | |
1650 // Copy all values above up the queue | |
1651 for( l=trackNN-1 ; l > n ; l--){ | |
1652 trackDistances[l]=trackDistances[l-1]; | |
1653 trackQIndexes[l]=trackQIndexes[l-1]; | |
1654 trackSIndexes[l]=trackSIndexes[l-1]; | |
1655 trackIDs[l]=trackIDs[l-1]; | |
1656 } | |
1657 trackDistances[n]=thisDist; | |
1658 trackQIndexes[n]=qIndexes[0]; | |
1659 trackSIndexes[n]=sIndexes[0]; | |
1660 trackIDs[n]=track; | |
1661 break; | |
1662 } | |
1663 } | |
1664 else | |
1665 break; | |
1666 } | |
1667 for(unsigned k=0; k<pointNN; k++){ | |
1668 distances[k]=-DBL_MAX; | |
1669 qIndexes[k]=~0; | |
1670 sIndexes[k]=~0; | |
1671 } | |
1672 } // tracks | |
1673 | |
1674 free(data_buffer); | |
1675 | |
1676 gettimeofday(&tv2, NULL); | |
1677 | |
1678 if(verbosity>1) { | |
1679 cerr << endl << "processed tracks :" << processedTracks | |
1680 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | |
1681 } | |
1682 | |
1683 if(adbQueryResponse==0){ | |
1684 if(verbosity>1) { | |
1685 cerr<<endl; | |
1686 } | |
1687 // Output answer | |
1688 // Loop over nearest neighbours | |
1689 for(k=0; k < min(trackNN,processedTracks); k++) | |
1690 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE | |
1691 << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << endl; | |
1692 } | |
1693 else{ // Process Web Services Query | |
1694 int listLen = min(trackNN, processedTracks); | |
1695 adbQueryResponse->result.__sizeRlist=listLen; | |
1696 adbQueryResponse->result.__sizeDist=listLen; | |
1697 adbQueryResponse->result.__sizeQpos=listLen; | |
1698 adbQueryResponse->result.__sizeSpos=listLen; | |
1699 adbQueryResponse->result.Rlist= new char*[listLen]; | |
1700 adbQueryResponse->result.Dist = new double[listLen]; | |
1701 adbQueryResponse->result.Qpos = new unsigned int[listLen]; | |
1702 adbQueryResponse->result.Spos = new unsigned int[listLen]; | |
1703 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ | |
1704 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; | |
1705 adbQueryResponse->result.Dist[k]=trackDistances[k]; | |
1706 adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; | |
1707 adbQueryResponse->result.Spos[k]=trackSIndexes[k]; | |
1708 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); | |
1709 } | |
1710 } | |
1711 | |
1712 // Clean up | |
1713 if(trackOffsetTable) | |
1714 delete trackOffsetTable; | |
1715 if(queryCopy) | |
1716 delete queryCopy; | |
1717 if(qNorm) | |
1718 delete qNorm; | |
1719 if(timesdata) | |
1720 delete[] timesdata; | |
1721 if(querydurs) | |
1722 delete[] querydurs; | |
1723 if(meanDBdur) | |
1724 delete meanDBdur; | |
1725 } | |
1726 | |
1727 // This is a common pattern in sequence queries: what we are doing is | |
1728 // taking a window of length seqlen over a buffer of length length, | |
1729 // and placing the sum of the elements in that window in the first | |
1730 // element of the window: thus replacing all but the last seqlen | |
1731 // elements in the buffer the corresponding windowed sum. | |
1732 void audioDB::sequence_sum(double *buffer, int length, int seqlen) { | |
1733 double tmp1, tmp2, *ps; | |
1734 int j, w; | |
1735 | |
1736 tmp1 = *buffer; | |
1737 j = 1; | |
1738 w = seqlen - 1; | |
1739 while(w--) { | |
1740 *buffer += buffer[j++]; | |
1741 } | |
1742 ps = buffer + 1; | |
1743 w = length - seqlen; // +1 - 1 | |
1744 while(w--) { | |
1745 tmp2 = *ps; | |
1746 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1); | |
1747 tmp1 = tmp2; | |
1748 ps++; | |
1749 } | |
1750 } | |
1751 | |
1752 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) { | |
1753 int w = length - seqlen + 1; | |
1754 while(w--) { | |
1755 *buffer = sqrt(*buffer); | |
1756 buffer++; | |
1757 } | |
1758 } | |
1759 | |
1760 void audioDB::sequence_average(double *buffer, int length, int seqlen) { | |
1761 int w = length - seqlen + 1; | |
1762 while(w--) { | |
1763 *buffer /= seqlen; | |
1764 buffer++; | |
1765 } | |
1766 } | |
1767 | |
1768 // k nearest-neighbor (k-NN) search between query and target tracks | |
1769 // efficient implementation based on matched filter | |
1770 // assumes normed shingles | |
1771 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track | |
1772 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ | |
1773 | |
1774 initTables(dbName, inFile); | |
1775 | |
1776 // For each input vector, find the closest pointNN matching output vectors and report | |
1777 // we use stdout in this stub version | |
1778 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
1779 double* query = (double*)(indata+sizeof(int)); | |
1780 double* queryCopy = 0; | |
1781 | |
1782 if(!(dbH->flags & O2_FLAG_L2NORM) ) | |
1783 error("Database must be L2 normed for sequence query","use -L2NORM"); | |
1784 | |
1785 if(numVectors<sequenceLength) | |
1786 error("Query shorter than requested sequence length", "maybe use -l"); | |
1787 | |
1788 if(verbosity>1) { | |
1789 cerr << "performing norms ... "; cerr.flush(); | |
1790 } | |
1791 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); | |
1792 | |
1793 // Make a copy of the query | |
1794 queryCopy = new double[numVectors*dbH->dim]; | |
1795 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | |
1796 qNorm = new double[numVectors]; | |
1797 sNorm = new double[dbVectors]; | |
1798 assert(qNorm&&sNorm&&queryCopy&&sequenceLength); | |
1799 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | |
1800 query = queryCopy; | |
1801 | |
1802 // Make norm measurements relative to sequenceLength | |
1803 unsigned w = sequenceLength-1; | |
1804 unsigned i,j; | |
1805 | |
1806 // Copy the L2 norm values to core to avoid disk random access later on | |
1807 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | |
1808 double* qnPtr = qNorm; | |
1809 double* snPtr = sNorm; | |
1810 | |
1811 double *sPower = 0, *qPower = 0; | |
1812 double *spPtr = 0, *qpPtr = 0; | |
1813 | |
1814 if (usingPower) { | |
1815 if (!(dbH->flags & O2_FLAG_POWER)) { | |
1816 error("database not power-enabled", dbName); | |
1817 } | |
1818 sPower = new double[dbVectors]; | |
1819 spPtr = sPower; | |
1820 memcpy(sPower, powerTable, dbVectors * sizeof(double)); | |
1821 } | |
1822 | |
1823 for(i=0; i<dbH->numFiles; i++){ | |
1824 if(trackTable[i]>=sequenceLength) { | |
1825 sequence_sum(snPtr, trackTable[i], sequenceLength); | |
1826 sequence_sqrt(snPtr, trackTable[i], sequenceLength); | |
1827 | |
1828 if (usingPower) { | |
1829 sequence_sum(spPtr, trackTable[i], sequenceLength); | |
1830 sequence_average(spPtr, trackTable[i], sequenceLength); | |
1831 } | |
1832 } | |
1833 snPtr += trackTable[i]; | |
1834 if (usingPower) { | |
1835 spPtr += trackTable[i]; | |
1836 } | |
1837 } | |
1838 | |
1839 sequence_sum(qnPtr, numVectors, sequenceLength); | |
1840 sequence_sqrt(qnPtr, numVectors, sequenceLength); | |
1841 | |
1842 if (usingPower) { | |
1843 qPower = new double[numVectors]; | |
1844 qpPtr = qPower; | |
1845 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { | |
1846 error("error seeking to data", powerFileName, "lseek"); | |
1847 } | |
1848 int count = read(powerfd, qPower, numVectors * sizeof(double)); | |
1849 if (count == -1) { | |
1850 error("error reading data", powerFileName, "read"); | |
1851 } | |
1852 if ((unsigned) count != numVectors * sizeof(double)) { | |
1853 error("short read", powerFileName); | |
1854 } | |
1855 | |
1856 sequence_sum(qpPtr, numVectors, sequenceLength); | |
1857 sequence_average(qpPtr, numVectors, sequenceLength); | |
1858 } | |
1859 | |
1860 if(verbosity>1) { | |
1861 cerr << "done." << endl; | |
1862 } | |
1863 | |
1864 if(verbosity>1) { | |
1865 cerr << "matching tracks..." << endl; | |
1866 } | |
1867 | |
1868 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
1869 assert(trackNN>0 && trackNN<=O2_MAXNN); | |
1870 | |
1871 // Make temporary dynamic memory for results | |
1872 double trackDistances[trackNN]; | |
1873 unsigned trackIDs[trackNN]; | |
1874 unsigned trackQIndexes[trackNN]; | |
1875 unsigned trackSIndexes[trackNN]; | |
1876 | |
1877 double distances[pointNN]; | |
1878 unsigned qIndexes[pointNN]; | |
1879 unsigned sIndexes[pointNN]; | |
1880 | |
1881 | |
1882 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; | |
1883 double thisDist; | |
1884 | |
1885 for(k=0; k<pointNN; k++){ | |
1886 distances[k]=1.0e6; | |
1887 qIndexes[k]=~0; | |
1888 sIndexes[k]=~0; | |
1889 } | |
1890 | |
1891 for(k=0; k<trackNN; k++){ | |
1892 trackDistances[k]=1.0e6; | |
1893 trackQIndexes[k]=~0; | |
1894 trackSIndexes[k]=~0; | |
1895 trackIDs[k]=~0; | |
1896 } | |
1897 | |
1898 // Timestamp and durations processing | |
1899 double meanQdur = 0; | |
1900 double *timesdata = 0; | |
1901 double *querydurs = 0; | |
1902 double *meanDBdur = 0; | |
1903 | |
1904 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | |
1905 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | |
1906 usingTimes=0; | |
1907 } | |
1908 | |
1909 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
1910 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | |
1911 | |
1912 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | |
1913 timesdata = new double[2*numVectors]; | |
1914 querydurs = new double[numVectors]; | |
1915 | |
1916 insertTimeStamps(numVectors, timesFile, timesdata); | |
1917 // Calculate durations of points | |
1918 for(k=0; k<numVectors-1; k++) { | |
1919 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | |
1920 meanQdur += querydurs[k]; | |
1921 } | |
1922 meanQdur/=k; | |
1923 if(verbosity>1) { | |
1924 cerr << "mean query file duration: " << meanQdur << endl; | |
1925 } | |
1926 meanDBdur = new double[dbH->numFiles]; | |
1927 assert(meanDBdur); | |
1928 for(k=0; k<dbH->numFiles; k++){ | |
1929 meanDBdur[k]=0.0; | |
1930 for(j=0; j<trackTable[k]-1 ; j++) { | |
1931 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j]; | |
1932 } | |
1933 meanDBdur[k]/=j; | |
1934 } | |
1935 } | |
1936 | |
1937 if(usingQueryPoint) | |
1938 if(queryPoint>numVectors || queryPoint>numVectors-wL+1) | |
1939 error("queryPoint > numVectors-wL+1 in query"); | |
1940 else{ | |
1941 if(verbosity>1) { | |
1942 cerr << "query point: " << queryPoint << endl; cerr.flush(); | |
1943 } | |
1944 query = query + queryPoint * dbH->dim; | |
1945 qnPtr = qnPtr + queryPoint; | |
1946 if (usingPower) { | |
1947 qpPtr = qpPtr + queryPoint; | |
1948 } | |
1949 numVectors=wL; | |
1950 } | |
1951 | |
1952 double ** D = 0; // Differences query and target | |
1953 double ** DD = 0; // Matched filter distance | |
1954 | |
1955 D = new double*[numVectors]; | |
1956 assert(D); | |
1957 DD = new double*[numVectors]; | |
1958 assert(DD); | |
1959 | |
1960 gettimeofday(&tv1, NULL); | |
1961 unsigned processedTracks = 0; | |
1962 unsigned successfulTracks=0; | |
1963 | |
1964 double* qp; | |
1965 double* sp; | |
1966 double* dp; | |
1967 | |
1968 // build track offset table | |
1969 off_t *trackOffsetTable = new off_t[dbH->numFiles]; | |
1970 unsigned cumTrack=0; | |
1971 off_t trackIndexOffset; | |
1972 for(k=0; k<dbH->numFiles;k++){ | |
1973 trackOffsetTable[k]=cumTrack; | |
1974 cumTrack+=trackTable[k]*dbH->dim; | |
1975 } | |
1976 | |
1977 char nextKey [MAXSTR]; | |
1978 | |
1979 // chi^2 statistics | |
1980 double sampleCount = 0; | |
1981 double sampleSum = 0; | |
1982 double logSampleSum = 0; | |
1983 double minSample = 1e9; | |
1984 double maxSample = 0; | |
1985 | |
1986 // Track loop | |
1987 size_t data_buffer_size = 0; | |
1988 double *data_buffer = 0; | |
1989 lseek(dbfid, dbH->dataOffset, SEEK_SET); | |
1990 | |
1991 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) { | |
1992 | |
1993 trackOffset = trackOffsetTable[track]; // numDoubles offset | |
1994 | |
1995 // get trackID from file if using a control file | |
1996 if(trackFile) { | |
1997 trackFile->getline(nextKey,MAXSTR); | |
1998 if(!trackFile->eof()) { | |
1999 track = getKeyPos(nextKey); | |
2000 trackOffset = trackOffsetTable[track]; | |
2001 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); | |
2002 } else { | |
2003 break; | |
2004 } | |
2005 } | |
2006 | |
2007 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | |
2008 | |
2009 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { | |
2010 if(data_buffer) { | |
2011 free(data_buffer); | |
2012 } | |
2013 { | |
2014 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; | |
2015 void *tmp = malloc(data_buffer_size); | |
2016 if (tmp == NULL) { | |
2017 error("error allocating data buffer"); | |
2018 } | |
2019 data_buffer = (double *) tmp; | |
2020 } | |
2021 } | |
2022 | |
2023 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); | |
2024 | |
2025 if(sequenceLength<=trackTable[track]){ // test for short sequences | |
2026 | |
2027 if(verbosity>7) { | |
2028 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); | |
2029 } | |
2030 | |
2031 // Sum products matrix | |
2032 for(j=0; j<numVectors;j++){ | |
2033 D[j]=new double[trackTable[track]]; | |
2034 assert(D[j]); | |
2035 | |
2036 } | |
2037 | |
2038 // Matched filter matrix | |
2039 for(j=0; j<numVectors;j++){ | |
2040 DD[j]=new double[trackTable[track]]; | |
2041 assert(DD[j]); | |
2042 } | |
2043 | |
2044 // Dot product | |
2045 for(j=0; j<numVectors; j++) | |
2046 for(k=0; k<trackTable[track]; k++){ | |
2047 qp=query+j*dbH->dim; | |
2048 sp=data_buffer+k*dbH->dim; | |
2049 DD[j][k]=0.0; // Initialize matched filter array | |
2050 dp=&D[j][k]; // point to correlation cell j,k | |
2051 *dp=0.0; // initialize correlation cell | |
2052 l=dbH->dim; // size of vectors | |
2053 while(l--) | |
2054 *dp+=*qp++**sp++; | |
2055 } | |
2056 | |
2057 // Matched Filter | |
2058 // HOP SIZE == 1 | |
2059 double* spd; | |
2060 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop | |
2061 for(w=0; w<wL; w++) | |
2062 for(j=0; j<numVectors-w; j++){ | |
2063 sp=DD[j]; | |
2064 spd=D[j+w]+w; | |
2065 k=trackTable[track]-w; | |
2066 while(k--) | |
2067 *sp+++=*spd++; | |
2068 } | |
2069 } | |
2070 | |
2071 else{ // HOP_SIZE != 1 | |
2072 for(w=0; w<wL; w++) | |
2073 for(j=0; j<numVectors-w; j+=HOP_SIZE){ | |
2074 sp=DD[j]; | |
2075 spd=D[j+w]+w; | |
2076 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){ | |
2077 *sp+=*spd; | |
2078 sp+=HOP_SIZE; | |
2079 spd+=HOP_SIZE; | |
2080 } | |
2081 } | |
2082 } | |
2083 | |
2084 if(verbosity>3 && usingTimes) { | |
2085 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; | |
2086 cerr.flush(); | |
2087 } | |
2088 | |
2089 if(!usingTimes || | |
2090 (usingTimes | |
2091 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ | |
2092 | |
2093 if(verbosity>3 && usingTimes) { | |
2094 cerr << "within duration tolerance." << endl; | |
2095 cerr.flush(); | |
2096 } | |
2097 | |
2098 // Search for minimum distance by shingles (concatenated vectors) | |
2099 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) | |
2100 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ | |
2101 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; | |
2102 if(verbosity>9) { | |
2103 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; | |
2104 } | |
2105 // Gather chi^2 statistics | |
2106 if(thisDist<minSample) | |
2107 minSample=thisDist; | |
2108 else if(thisDist>maxSample) | |
2109 maxSample=thisDist; | |
2110 if(thisDist>1e-9){ | |
2111 sampleCount++; | |
2112 sampleSum+=thisDist; | |
2113 logSampleSum+=log(thisDist); | |
2114 } | |
2115 | |
2116 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); | |
2117 // Power test | |
2118 if (usingPower) { | |
2119 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) { | |
2120 thisDist = 1000000.0; | |
2121 } | |
2122 } | |
2123 | |
2124 // k-NN match algorithm | |
2125 m=pointNN; | |
2126 while(m--){ | |
2127 if(thisDist<=distances[m]) | |
2128 if(m==0 || thisDist>=distances[m-1]){ | |
2129 // Shuffle distances up the list | |
2130 for(l=pointNN-1; l>m; l--){ | |
2131 distances[l]=distances[l-1]; | |
2132 qIndexes[l]=qIndexes[l-1]; | |
2133 sIndexes[l]=sIndexes[l-1]; | |
2134 } | |
2135 distances[m]=thisDist; | |
2136 if(usingQueryPoint) | |
2137 qIndexes[m]=queryPoint; | |
2138 else | |
2139 qIndexes[m]=j; | |
2140 sIndexes[m]=k; | |
2141 break; | |
2142 } | |
2143 } | |
2144 } | |
2145 // Calculate the mean of the N-Best matches | |
2146 thisDist=0.0; | |
2147 for(m=0; m<pointNN; m++) { | |
2148 if (distances[m] == 1000000.0) break; | |
2149 thisDist+=distances[m]; | |
2150 } | |
2151 thisDist/=m; | |
2152 | |
2153 // Let's see the distances then... | |
2154 if(verbosity>3) { | |
2155 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; | |
2156 } | |
2157 | |
2158 | |
2159 // All the track stuff goes here | |
2160 n=trackNN; | |
2161 while(n--){ | |
2162 if(thisDist<=trackDistances[n]){ | |
2163 if((n==0 || thisDist>=trackDistances[n-1])){ | |
2164 // Copy all values above up the queue | |
2165 for( l=trackNN-1 ; l > n ; l--){ | |
2166 trackDistances[l]=trackDistances[l-1]; | |
2167 trackQIndexes[l]=trackQIndexes[l-1]; | |
2168 trackSIndexes[l]=trackSIndexes[l-1]; | |
2169 trackIDs[l]=trackIDs[l-1]; | |
2170 } | |
2171 trackDistances[n]=thisDist; | |
2172 trackQIndexes[n]=qIndexes[0]; | |
2173 trackSIndexes[n]=sIndexes[0]; | |
2174 successfulTracks++; | |
2175 trackIDs[n]=track; | |
2176 break; | |
2177 } | |
2178 } | |
2179 else | |
2180 break; | |
2181 } | |
2182 } // Duration match | |
2183 | |
2184 // Clean up current track | |
2185 if(D!=NULL){ | |
2186 for(j=0; j<numVectors; j++) | |
2187 delete[] D[j]; | |
2188 } | |
2189 | |
2190 if(DD!=NULL){ | |
2191 for(j=0; j<numVectors; j++) | |
2192 delete[] DD[j]; | |
2193 } | |
2194 } | |
2195 // per-track reset array values | |
2196 for(unsigned k=0; k<pointNN; k++){ | |
2197 distances[k]=1.0e6; | |
2198 qIndexes[k]=~0; | |
2199 sIndexes[k]=~0; | |
2200 } | |
2201 } | |
2202 | |
2203 free(data_buffer); | |
2204 | |
2205 gettimeofday(&tv2,NULL); | |
2206 if(verbosity>1) { | |
2207 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" | |
2208 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | |
2209 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum | |
2210 << " minSample: " << minSample << " maxSample: " << maxSample << endl; | |
2211 } | |
2212 if(adbQueryResponse==0){ | |
2213 if(verbosity>1) { | |
2214 cerr<<endl; | |
2215 } | |
2216 // Output answer | |
2217 // Loop over nearest neighbours | |
2218 for(k=0; k < min(trackNN,successfulTracks); k++) | |
2219 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " " | |
2220 << trackQIndexes[k] << " " << trackSIndexes[k] << endl; | |
2221 } | |
2222 else{ // Process Web Services Query | |
2223 int listLen = min(trackNN, processedTracks); | |
2224 adbQueryResponse->result.__sizeRlist=listLen; | |
2225 adbQueryResponse->result.__sizeDist=listLen; | |
2226 adbQueryResponse->result.__sizeQpos=listLen; | |
2227 adbQueryResponse->result.__sizeSpos=listLen; | |
2228 adbQueryResponse->result.Rlist= new char*[listLen]; | |
2229 adbQueryResponse->result.Dist = new double[listLen]; | |
2230 adbQueryResponse->result.Qpos = new unsigned int[listLen]; | |
2231 adbQueryResponse->result.Spos = new unsigned int[listLen]; | |
2232 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ | |
2233 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; | |
2234 adbQueryResponse->result.Dist[k]=trackDistances[k]; | |
2235 adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; | |
2236 adbQueryResponse->result.Spos[k]=trackSIndexes[k]; | |
2237 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); | |
2238 } | |
2239 } | |
2240 | |
2241 // Clean up | |
2242 if(trackOffsetTable) | |
2243 delete[] trackOffsetTable; | |
2244 if(queryCopy) | |
2245 delete[] queryCopy; | |
2246 if(qNorm) | |
2247 delete[] qNorm; | |
2248 if(sNorm) | |
2249 delete[] sNorm; | |
2250 if(qPower) | |
2251 delete[] qPower; | |
2252 if(sPower) | |
2253 delete[] sPower; | |
2254 if(D) | |
2255 delete[] D; | |
2256 if(DD) | |
2257 delete[] DD; | |
2258 if(timesdata) | |
2259 delete[] timesdata; | |
2260 if(querydurs) | |
2261 delete[] querydurs; | |
2262 if(meanDBdur) | |
2263 delete[] meanDBdur; | |
2264 } | |
2265 | |
2266 // Radius search between query and target tracks | |
2267 // efficient implementation based on matched filter | |
2268 // assumes normed shingles | |
2269 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track | |
2270 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ | |
2271 | |
2272 initTables(dbName, inFile); | |
2273 | |
2274 // For each input vector, find the closest pointNN matching output vectors and report | |
2275 // we use stdout in this stub version | |
2276 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | |
2277 double* query = (double*)(indata+sizeof(int)); | |
2278 double* queryCopy = 0; | |
2279 | |
2280 if(!(dbH->flags & O2_FLAG_L2NORM) ) | |
2281 error("Database must be L2 normed for sequence query","use -l2norm"); | |
2282 | |
2283 if(verbosity>1) { | |
2284 cerr << "performing norms ... "; cerr.flush(); | |
2285 } | |
2286 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); | |
2287 | |
2288 // Make a copy of the query | |
2289 queryCopy = new double[numVectors*dbH->dim]; | |
2290 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | |
2291 qNorm = new double[numVectors]; | |
2292 sNorm = new double[dbVectors]; | |
2293 assert(qNorm&&sNorm&&queryCopy&&sequenceLength); | |
2294 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | |
2295 query = queryCopy; | |
2296 | |
2297 // Make norm measurements relative to sequenceLength | |
2298 unsigned w = sequenceLength-1; | |
2299 unsigned i,j; | |
2300 | |
2301 // Copy the L2 norm values to core to avoid disk random access later on | |
2302 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | |
2303 double* snPtr = sNorm; | |
2304 double* qnPtr = qNorm; | |
2305 | |
2306 double *sPower = 0, *qPower = 0; | |
2307 double *spPtr = 0, *qpPtr = 0; | |
2308 | |
2309 if (usingPower) { | |
2310 if(!(dbH->flags & O2_FLAG_POWER)) { | |
2311 error("database not power-enabled", dbName); | |
2312 } | |
2313 sPower = new double[dbVectors]; | |
2314 spPtr = sPower; | |
2315 memcpy(sPower, powerTable, dbVectors * sizeof(double)); | |
2316 } | |
2317 | |
2318 for(i=0; i<dbH->numFiles; i++){ | |
2319 if(trackTable[i]>=sequenceLength) { | |
2320 sequence_sum(snPtr, trackTable[i], sequenceLength); | |
2321 sequence_sqrt(snPtr, trackTable[i], sequenceLength); | |
2322 if (usingPower) { | |
2323 sequence_sum(spPtr, trackTable[i], sequenceLength); | |
2324 sequence_average(spPtr, trackTable[i], sequenceLength); | |
2325 } | |
2326 } | |
2327 snPtr += trackTable[i]; | |
2328 if (usingPower) { | |
2329 spPtr += trackTable[i]; | |
2330 } | |
2331 } | |
2332 | |
2333 sequence_sum(qnPtr, numVectors, sequenceLength); | |
2334 sequence_sqrt(qnPtr, numVectors, sequenceLength); | |
2335 | |
2336 if (usingPower) { | |
2337 qPower = new double[numVectors]; | |
2338 qpPtr = qPower; | |
2339 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { | |
2340 error("error seeking to data", powerFileName, "lseek"); | |
2341 } | |
2342 int count = read(powerfd, qPower, numVectors * sizeof(double)); | |
2343 if (count == -1) { | |
2344 error("error reading data", powerFileName, "read"); | |
2345 } | |
2346 if ((unsigned) count != numVectors * sizeof(double)) { | |
2347 error("short read", powerFileName); | |
2348 } | |
2349 | |
2350 sequence_sum(qpPtr, numVectors, sequenceLength); | |
2351 sequence_average(qpPtr, numVectors, sequenceLength); | |
2352 } | |
2353 | |
2354 if(verbosity>1) { | |
2355 cerr << "done." << endl; | |
2356 } | |
2357 | |
2358 if(verbosity>1) { | |
2359 cerr << "matching tracks..." << endl; | |
2360 } | |
2361 | |
2362 assert(pointNN>0 && pointNN<=O2_MAXNN); | |
2363 assert(trackNN>0 && trackNN<=O2_MAXNN); | |
2364 | |
2365 // Make temporary dynamic memory for results | |
2366 double trackDistances[trackNN]; | |
2367 unsigned trackIDs[trackNN]; | |
2368 unsigned trackQIndexes[trackNN]; | |
2369 unsigned trackSIndexes[trackNN]; | |
2370 | |
2371 double distances[pointNN]; | |
2372 unsigned qIndexes[pointNN]; | |
2373 unsigned sIndexes[pointNN]; | |
2374 | |
2375 | |
2376 unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; | |
2377 double thisDist; | |
2378 | |
2379 for(k=0; k<pointNN; k++){ | |
2380 distances[k]=0.0; | |
2381 qIndexes[k]=~0; | |
2382 sIndexes[k]=~0; | |
2383 } | |
2384 | |
2385 for(k=0; k<trackNN; k++){ | |
2386 trackDistances[k]=0.0; | |
2387 trackQIndexes[k]=~0; | |
2388 trackSIndexes[k]=~0; | |
2389 trackIDs[k]=~0; | |
2390 } | |
2391 | |
2392 // Timestamp and durations processing | |
2393 double meanQdur = 0; | |
2394 double *timesdata = 0; | |
2395 double *querydurs = 0; | |
2396 double *meanDBdur = 0; | |
2397 | |
2398 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | |
2399 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | |
2400 usingTimes=0; | |
2401 } | |
2402 | |
2403 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | |
2404 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | |
2405 | |
2406 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | |
2407 timesdata = new double[2*numVectors]; | |
2408 querydurs = new double[numVectors]; | |
2409 | |
2410 insertTimeStamps(numVectors, timesFile, timesdata); | |
2411 // Calculate durations of points | |
2412 for(k=0; k<numVectors-1; k++){ | |
2413 querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; | |
2414 meanQdur += querydurs[k]; | |
2415 } | |
2416 meanQdur/=k; | |
2417 if(verbosity>1) { | |
2418 cerr << "mean query file duration: " << meanQdur << endl; | |
2419 } | |
2420 meanDBdur = new double[dbH->numFiles]; | |
2421 assert(meanDBdur); | |
2422 for(k=0; k<dbH->numFiles; k++){ | |
2423 meanDBdur[k]=0.0; | |
2424 for(j=0; j<trackTable[k]-1 ; j++) { | |
2425 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j]; | |
2426 } | |
2427 meanDBdur[k]/=j; | |
2428 } | |
2429 } | |
2430 | |
2431 if(usingQueryPoint) | |
2432 if(queryPoint>numVectors || queryPoint>numVectors-wL+1) | |
2433 error("queryPoint > numVectors-wL+1 in query"); | |
2434 else{ | |
2435 if(verbosity>1) { | |
2436 cerr << "query point: " << queryPoint << endl; cerr.flush(); | |
2437 } | |
2438 query = query + queryPoint*dbH->dim; | |
2439 qnPtr = qnPtr + queryPoint; | |
2440 if (usingPower) { | |
2441 qpPtr = qpPtr + queryPoint; | |
2442 } | |
2443 numVectors=wL; | |
2444 } | |
2445 | |
2446 double ** D = 0; // Differences query and target | |
2447 double ** DD = 0; // Matched filter distance | |
2448 | |
2449 D = new double*[numVectors]; | |
2450 assert(D); | |
2451 DD = new double*[numVectors]; | |
2452 assert(DD); | |
2453 | |
2454 gettimeofday(&tv1, NULL); | |
2455 unsigned processedTracks = 0; | |
2456 unsigned successfulTracks=0; | |
2457 | |
2458 double* qp; | |
2459 double* sp; | |
2460 double* dp; | |
2461 | |
2462 // build track offset table | |
2463 off_t *trackOffsetTable = new off_t[dbH->numFiles]; | |
2464 unsigned cumTrack=0; | |
2465 off_t trackIndexOffset; | |
2466 for(k=0; k<dbH->numFiles;k++){ | |
2467 trackOffsetTable[k]=cumTrack; | |
2468 cumTrack+=trackTable[k]*dbH->dim; | |
2469 } | |
2470 | |
2471 char nextKey [MAXSTR]; | |
2472 | |
2473 // chi^2 statistics | |
2474 double sampleCount = 0; | |
2475 double sampleSum = 0; | |
2476 double logSampleSum = 0; | |
2477 double minSample = 1e9; | |
2478 double maxSample = 0; | |
2479 | |
2480 // Track loop | |
2481 size_t data_buffer_size = 0; | |
2482 double *data_buffer = 0; | |
2483 lseek(dbfid, dbH->dataOffset, SEEK_SET); | |
2484 | |
2485 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ | |
2486 | |
2487 trackOffset = trackOffsetTable[track]; // numDoubles offset | |
2488 | |
2489 // get trackID from file if using a control file | |
2490 if(trackFile) { | |
2491 trackFile->getline(nextKey,MAXSTR); | |
2492 if(!trackFile->eof()) { | |
2493 track = getKeyPos(nextKey); | |
2494 trackOffset = trackOffsetTable[track]; | |
2495 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); | |
2496 } else { | |
2497 break; | |
2498 } | |
2499 } | |
2500 | |
2501 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | |
2502 | |
2503 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { | |
2504 if(data_buffer) { | |
2505 free(data_buffer); | |
2506 } | |
2507 { | |
2508 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; | |
2509 void *tmp = malloc(data_buffer_size); | |
2510 if (tmp == NULL) { | |
2511 error("error allocating data buffer"); | |
2512 } | |
2513 data_buffer = (double *) tmp; | |
2514 } | |
2515 } | |
2516 | |
2517 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); | |
2518 | |
2519 if(sequenceLength<=trackTable[track]){ // test for short sequences | |
2520 | |
2521 if(verbosity>7) { | |
2522 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); | |
2523 } | |
2524 | |
2525 // Sum products matrix | |
2526 for(j=0; j<numVectors;j++){ | |
2527 D[j]=new double[trackTable[track]]; | |
2528 assert(D[j]); | |
2529 | |
2530 } | |
2531 | |
2532 // Matched filter matrix | |
2533 for(j=0; j<numVectors;j++){ | |
2534 DD[j]=new double[trackTable[track]]; | |
2535 assert(DD[j]); | |
2536 } | |
2537 | |
2538 // Dot product | |
2539 for(j=0; j<numVectors; j++) | |
2540 for(k=0; k<trackTable[track]; k++){ | |
2541 qp=query+j*dbH->dim; | |
2542 sp=data_buffer+k*dbH->dim; | |
2543 DD[j][k]=0.0; // Initialize matched filter array | |
2544 dp=&D[j][k]; // point to correlation cell j,k | |
2545 *dp=0.0; // initialize correlation cell | |
2546 l=dbH->dim; // size of vectors | |
2547 while(l--) | |
2548 *dp+=*qp++**sp++; | |
2549 } | |
2550 | |
2551 // Matched Filter | |
2552 // HOP SIZE == 1 | |
2553 double* spd; | |
2554 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop | |
2555 for(w=0; w<wL; w++) | |
2556 for(j=0; j<numVectors-w; j++){ | |
2557 sp=DD[j]; | |
2558 spd=D[j+w]+w; | |
2559 k=trackTable[track]-w; | |
2560 while(k--) | |
2561 *sp+++=*spd++; | |
2562 } | |
2563 } | |
2564 | |
2565 else{ // HOP_SIZE != 1 | |
2566 for(w=0; w<wL; w++) | |
2567 for(j=0; j<numVectors-w; j+=HOP_SIZE){ | |
2568 sp=DD[j]; | |
2569 spd=D[j+w]+w; | |
2570 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){ | |
2571 *sp+=*spd; | |
2572 sp+=HOP_SIZE; | |
2573 spd+=HOP_SIZE; | |
2574 } | |
2575 } | |
2576 } | |
2577 | |
2578 if(verbosity>3 && usingTimes) { | |
2579 cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; | |
2580 cerr.flush(); | |
2581 } | |
2582 | |
2583 if(!usingTimes || | |
2584 (usingTimes | |
2585 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ | |
2586 | |
2587 if(verbosity>3 && usingTimes) { | |
2588 cerr << "within duration tolerance." << endl; | |
2589 cerr.flush(); | |
2590 } | |
2591 | |
2592 // Search for minimum distance by shingles (concatenated vectors) | |
2593 for(j=0;j<=numVectors-wL;j+=HOP_SIZE) | |
2594 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ | |
2595 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; | |
2596 if(verbosity>9) { | |
2597 cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; | |
2598 } | |
2599 // Gather chi^2 statistics | |
2600 if(thisDist<minSample) | |
2601 minSample=thisDist; | |
2602 else if(thisDist>maxSample) | |
2603 maxSample=thisDist; | |
2604 if(thisDist>1e-9){ | |
2605 sampleCount++; | |
2606 sampleSum+=thisDist; | |
2607 logSampleSum+=log(thisDist); | |
2608 } | |
2609 | |
2610 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); | |
2611 // Power test | |
2612 if (usingPower) { | |
2613 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) { | |
2614 thisDist = 1000000.0; | |
2615 } | |
2616 } | |
2617 | |
2618 if(thisDist>=0 && thisDist<=radius){ | |
2619 distances[0]++; // increment count | |
2620 break; // only need one track point per query point | |
2621 } | |
2622 } | |
2623 // How many points were below threshold ? | |
2624 thisDist=distances[0]; | |
2625 | |
2626 // Let's see the distances then... | |
2627 if(verbosity>3) { | |
2628 cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; | |
2629 } | |
2630 | |
2631 // All the track stuff goes here | |
2632 n=trackNN; | |
2633 while(n--){ | |
2634 if(thisDist>trackDistances[n]){ | |
2635 if((n==0 || thisDist<=trackDistances[n-1])){ | |
2636 // Copy all values above up the queue | |
2637 for( l=trackNN-1 ; l > n ; l--){ | |
2638 trackDistances[l]=trackDistances[l-1]; | |
2639 trackQIndexes[l]=trackQIndexes[l-1]; | |
2640 trackSIndexes[l]=trackSIndexes[l-1]; | |
2641 trackIDs[l]=trackIDs[l-1]; | |
2642 } | |
2643 trackDistances[n]=thisDist; | |
2644 trackQIndexes[n]=qIndexes[0]; | |
2645 trackSIndexes[n]=sIndexes[0]; | |
2646 successfulTracks++; | |
2647 trackIDs[n]=track; | |
2648 break; | |
2649 } | |
2650 } | |
2651 else | |
2652 break; | |
2653 } | |
2654 } // Duration match | |
2655 | |
2656 // Clean up current track | |
2657 if(D!=NULL){ | |
2658 for(j=0; j<numVectors; j++) | |
2659 delete[] D[j]; | |
2660 } | |
2661 | |
2662 if(DD!=NULL){ | |
2663 for(j=0; j<numVectors; j++) | |
2664 delete[] DD[j]; | |
2665 } | |
2666 } | |
2667 // per-track reset array values | |
2668 for(unsigned k=0; k<pointNN; k++){ | |
2669 distances[k]=0.0; | |
2670 qIndexes[k]=~0; | |
2671 sIndexes[k]=~0; | |
2672 } | |
2673 } | |
2674 | |
2675 free(data_buffer); | |
2676 | |
2677 gettimeofday(&tv2,NULL); | |
2678 if(verbosity>1) { | |
2679 cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" | |
2680 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | |
2681 cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum | |
2682 << " minSample: " << minSample << " maxSample: " << maxSample << endl; | |
2683 } | |
2684 | |
2685 if(adbQueryResponse==0){ | |
2686 if(verbosity>1) { | |
2687 cerr<<endl; | |
2688 } | |
2689 // Output answer | |
2690 // Loop over nearest neighbours | |
2691 for(k=0; k < min(trackNN,successfulTracks); k++) | |
2692 cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << endl; | |
2693 } | |
2694 else{ // Process Web Services Query | |
2695 int listLen = min(trackNN, processedTracks); | |
2696 adbQueryResponse->result.__sizeRlist=listLen; | |
2697 adbQueryResponse->result.__sizeDist=listLen; | |
2698 adbQueryResponse->result.__sizeQpos=listLen; | |
2699 adbQueryResponse->result.__sizeSpos=listLen; | |
2700 adbQueryResponse->result.Rlist= new char*[listLen]; | |
2701 adbQueryResponse->result.Dist = new double[listLen]; | |
2702 adbQueryResponse->result.Qpos = new unsigned int[listLen]; | |
2703 adbQueryResponse->result.Spos = new unsigned int[listLen]; | |
2704 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ | |
2705 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; | |
2706 adbQueryResponse->result.Dist[k]=trackDistances[k]; | |
2707 adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; | |
2708 adbQueryResponse->result.Spos[k]=trackSIndexes[k]; | |
2709 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); | |
2710 } | |
2711 } | |
2712 | |
2713 // Clean up | |
2714 if(trackOffsetTable) | |
2715 delete[] trackOffsetTable; | |
2716 if(queryCopy) | |
2717 delete[] queryCopy; | |
2718 if(qNorm) | |
2719 delete[] qNorm; | |
2720 if(sNorm) | |
2721 delete[] sNorm; | |
2722 if(qPower) | |
2723 delete[] qPower; | |
2724 if(sPower) | |
2725 delete[] sPower; | |
2726 if(D) | |
2727 delete[] D; | |
2728 if(DD) | |
2729 delete[] DD; | |
2730 if(timesdata) | |
2731 delete[] timesdata; | |
2732 if(querydurs) | |
2733 delete[] querydurs; | |
2734 if(meanDBdur) | |
2735 delete[] meanDBdur; | |
2736 } | |
2737 | |
2738 // Unit norm block of features | 415 // Unit norm block of features |
2739 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ | 416 |
2740 unsigned d; | 417 /* FIXME: in fact this does not unit norm a block of features, it just |
2741 double L2, *p; | 418 records the L2 norms somewhere. unitNorm() does in fact unit norm |
2742 if(verbosity>2) { | 419 a block of features. */ |
2743 cerr << "norming " << n << " vectors...";cerr.flush(); | |
2744 } | |
2745 while(n--){ | |
2746 p=X; | |
2747 L2=0.0; | |
2748 d=dim; | |
2749 while(d--){ | |
2750 L2+=*p**p; | |
2751 p++; | |
2752 } | |
2753 /* L2=sqrt(L2);*/ | |
2754 if(qNorm) | |
2755 *qNorm++=L2; | |
2756 /* | |
2757 oneOverL2 = 1.0/L2; | |
2758 d=dim; | |
2759 while(d--){ | |
2760 *X*=oneOverL2; | |
2761 X++; | |
2762 */ | |
2763 X+=dim; | |
2764 } | |
2765 if(verbosity>2) { | |
2766 cerr << "done..." << endl; | |
2767 } | |
2768 } | |
2769 | |
2770 // Unit norm block of features | |
2771 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ | 420 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ |
2772 unsigned d; | 421 unsigned d; |
2773 double *p; | 422 double *p; |
2774 unsigned nn = n; | 423 unsigned nn = n; |
2775 | 424 |
2776 assert(l2normTable); | 425 assert(l2normTable); |
2777 | 426 |
2778 if( !append && (dbH->flags & O2_FLAG_L2NORM) ) | 427 if( !append && (dbH->flags & O2_FLAG_L2NORM) ) |
2779 error("Database is already L2 normed", "automatic norm on insert is enabled"); | 428 error("Database is already L2 normed", "automatic norm on insert is enabled"); |
2780 | 429 |
2781 if(verbosity>2) { | 430 VERB_LOG(2, "norming %u vectors...", n); |
2782 cerr << "norming " << n << " vectors...";cerr.flush(); | |
2783 } | |
2784 | 431 |
2785 double* l2buf = new double[n]; | 432 double* l2buf = new double[n]; |
2786 double* l2ptr = l2buf; | 433 double* l2ptr = l2buf; |
2787 assert(l2buf); | 434 assert(l2buf); |
2788 assert(X); | 435 assert(X); |
2809 offset=0; | 456 offset=0; |
2810 } | 457 } |
2811 memcpy(l2normTable+offset, l2buf, n*sizeof(double)); | 458 memcpy(l2normTable+offset, l2buf, n*sizeof(double)); |
2812 if(l2buf) | 459 if(l2buf) |
2813 delete[] l2buf; | 460 delete[] l2buf; |
2814 if(verbosity>2) { | 461 VERB_LOG(2, " done."); |
2815 cerr << "done..." << endl; | |
2816 } | |
2817 } | |
2818 | |
2819 | |
2820 // Start an audioDB server on the host | |
2821 void audioDB::startServer(){ | |
2822 struct soap soap; | |
2823 int m, s; // master and slave sockets | |
2824 soap_init(&soap); | |
2825 // FIXME: largely this use of SO_REUSEADDR is to make writing (and | |
2826 // running) test cases more convenient, so that multiple test runs | |
2827 // in close succession don't fail because of a bin() error. | |
2828 // Investigate whether there are any potential drawbacks in this, | |
2829 // and also whether there's a better way to write the tests. -- | |
2830 // CSR, 2007-10-03 | |
2831 soap.bind_flags |= SO_REUSEADDR; | |
2832 m = soap_bind(&soap, NULL, port, 100); | |
2833 if (m < 0) | |
2834 soap_print_fault(&soap, stderr); | |
2835 else | |
2836 { | |
2837 fprintf(stderr, "Socket connection successful: master socket = %d\n", m); | |
2838 for (int i = 1; ; i++) | |
2839 { | |
2840 s = soap_accept(&soap); | |
2841 if (s < 0) | |
2842 { | |
2843 soap_print_fault(&soap, stderr); | |
2844 break; | |
2845 } | |
2846 fprintf(stderr, "%d: accepted connection from IP=%lu.%lu.%lu.%lu socket=%d\n", i, | |
2847 (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s); | |
2848 if (soap_serve(&soap) != SOAP_OK) // process RPC request | |
2849 soap_print_fault(&soap, stderr); // print error | |
2850 fprintf(stderr, "request served\n"); | |
2851 soap_destroy(&soap); // clean up class instances | |
2852 soap_end(&soap); // clean up everything and close socket | |
2853 } | |
2854 } | |
2855 soap_done(&soap); // close master socket and detach environment | |
2856 } | |
2857 | |
2858 | |
2859 // web services | |
2860 | |
2861 // SERVER SIDE | |
2862 int adb__status(struct soap* soap, xsd__string dbName, adb__statusResponse &adbStatusResponse){ | |
2863 char* const argv[]={"audioDB",COM_STATUS,"-d",dbName}; | |
2864 const unsigned argc = 4; | |
2865 try { | |
2866 audioDB(argc, argv, &adbStatusResponse); | |
2867 return SOAP_OK; | |
2868 } catch(char *err) { | |
2869 soap_receiver_fault(soap, err, ""); | |
2870 return SOAP_FAULT; | |
2871 } | |
2872 } | |
2873 | |
2874 // Literal translation of command line to web service | |
2875 | |
2876 int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResponse &adbQueryResponse){ | |
2877 char queryType[256]; | |
2878 for(int k=0; k<256; k++) | |
2879 queryType[k]='\0'; | |
2880 if(qType == O2_POINT_QUERY) | |
2881 strncpy(queryType, "point", strlen("point")); | |
2882 else if (qType == O2_SEQUENCE_QUERY) | |
2883 strncpy(queryType, "sequence", strlen("sequence")); | |
2884 else if(qType == O2_TRACK_QUERY) | |
2885 strncpy(queryType,"track", strlen("track")); | |
2886 else | |
2887 strncpy(queryType, "", strlen("")); | |
2888 | |
2889 if(pointNN==0) | |
2890 pointNN=10; | |
2891 if(trackNN==0) | |
2892 trackNN=10; | |
2893 if(seqLen==0) | |
2894 seqLen=16; | |
2895 | |
2896 char qPosStr[256]; | |
2897 sprintf(qPosStr, "%d", qPos); | |
2898 char pointNNStr[256]; | |
2899 sprintf(pointNNStr,"%d",pointNN); | |
2900 char trackNNStr[256]; | |
2901 sprintf(trackNNStr,"%d",trackNN); | |
2902 char seqLenStr[256]; | |
2903 sprintf(seqLenStr,"%d",seqLen); | |
2904 | |
2905 const char* argv[] ={ | |
2906 "./audioDB", | |
2907 COM_QUERY, | |
2908 queryType, // Need to pass a parameter | |
2909 COM_DATABASE, | |
2910 ENSURE_STRING(dbName), | |
2911 COM_FEATURES, | |
2912 ENSURE_STRING(qKey), | |
2913 COM_KEYLIST, | |
2914 ENSURE_STRING(keyList), | |
2915 COM_TIMES, | |
2916 ENSURE_STRING(timesFileName), | |
2917 COM_QPOINT, | |
2918 qPosStr, | |
2919 COM_POINTNN, | |
2920 pointNNStr, | |
2921 COM_TRACKNN, | |
2922 trackNNStr, // Need to pass a parameter | |
2923 COM_SEQLEN, | |
2924 seqLenStr | |
2925 }; | |
2926 | |
2927 const unsigned argc = 19; | |
2928 try { | |
2929 audioDB(argc, (char* const*)argv, &adbQueryResponse); | |
2930 return SOAP_OK; | |
2931 } catch (char *err) { | |
2932 soap_receiver_fault(soap, err, ""); | |
2933 return SOAP_FAULT; | |
2934 } | |
2935 } | |
2936 | |
2937 int adb__sequenceQuery(struct soap* soap, xsd__string dbName, xsd__string qKey, | |
2938 adb__sequenceQueryParms *parms, | |
2939 adb__queryResponse &adbQueryResponse) { | |
2940 | |
2941 char qPosStr[256]; | |
2942 char pointNNStr[256]; | |
2943 char trackNNStr[256]; | |
2944 char seqLenStr[256]; | |
2945 char relative_thresholdStr[256]; | |
2946 char absolute_thresholdStr[256]; | |
2947 | |
2948 /* When the branch is merged, move this to a header and use it | |
2949 elsewhere */ | |
2950 #define INTSTRINGIFY(val, str) \ | |
2951 snprintf(str, 256, "%d", val); | |
2952 #define DOUBLESTRINGIFY(val, str) \ | |
2953 snprintf(str, 256, "%f", val); | |
2954 | |
2955 INTSTRINGIFY(parms->qPos, qPosStr); | |
2956 INTSTRINGIFY(parms->pointNN, pointNNStr); | |
2957 INTSTRINGIFY(parms->segNN, trackNNStr); | |
2958 /* FIXME: decide which of segLen and seqLen should live */ | |
2959 INTSTRINGIFY(parms->segLen, seqLenStr); | |
2960 | |
2961 DOUBLESTRINGIFY(parms->relative_threshold, relative_thresholdStr); | |
2962 DOUBLESTRINGIFY(parms->absolute_threshold, absolute_thresholdStr); | |
2963 | |
2964 const char *argv[] = { | |
2965 "./audioDB", | |
2966 COM_QUERY, | |
2967 "sequence", | |
2968 COM_DATABASE, | |
2969 dbName, | |
2970 COM_FEATURES, | |
2971 qKey, | |
2972 COM_KEYLIST, | |
2973 /* FIXME: when this branch is merged, use ENSURE_STRING */ | |
2974 parms->keyList==0?"":parms->keyList, | |
2975 COM_TIMES, | |
2976 parms->timesFileName==0?"":parms->timesFileName, | |
2977 COM_QUERYPOWER, | |
2978 parms->powerFileName==0?"":parms->powerFileName, | |
2979 COM_QPOINT, | |
2980 qPosStr, | |
2981 COM_POINTNN, | |
2982 pointNNStr, | |
2983 COM_TRACKNN, | |
2984 trackNNStr, | |
2985 COM_SEQLEN, | |
2986 seqLenStr, | |
2987 COM_RELATIVE_THRESH, | |
2988 relative_thresholdStr, | |
2989 COM_ABSOLUTE_THRESH, | |
2990 absolute_thresholdStr | |
2991 }; | |
2992 | |
2993 const unsigned argc = 25; | |
2994 | |
2995 try { | |
2996 audioDB(argc, (char* const*)argv, &adbQueryResponse); | |
2997 return SOAP_OK; | |
2998 } catch (char *err) { | |
2999 soap_receiver_fault(soap, err, ""); | |
3000 return SOAP_FAULT; | |
3001 } | |
3002 } | 462 } |
3003 | 463 |
3004 int main(const unsigned argc, char* const argv[]){ | 464 int main(const unsigned argc, char* const argv[]){ |
3005 audioDB(argc, argv); | 465 audioDB(argc, argv); |
3006 } | 466 } |