Mercurial > hg > audiodb
comparison audioDB.cpp @ 116:531ce5162861 endian-neutral
Towards endian-neutrality, step 1.
dbH treatment is now endian-neutral: all on-disk and in-memory uint32_t
fields of dbH are in network byte order, and all reads and writes in
audioDB host code go through ntohl() and htonl() as appropriate.
author | mas01cr |
---|---|
date | Fri, 12 Oct 2007 11:20:35 +0000 |
parents | bc141fd1dc41 |
children | e800eac265c3 |
comparison
equal
deleted
inserted
replaced
115:97f4ff699d7c | 116:531ce5162861 |
---|---|
418 | 418 |
419 dbH = new dbTableHeaderT(); | 419 dbH = new dbTableHeaderT(); |
420 assert(dbH); | 420 assert(dbH); |
421 | 421 |
422 // Initialize header | 422 // Initialize header |
423 dbH->magic = O2_MAGIC; | 423 dbH->magic = htonl(O2_MAGIC); |
424 dbH->version = O2_FORMAT_VERSION; | 424 dbH->version = htonl(O2_FORMAT_VERSION); |
425 dbH->numFiles = 0; | 425 dbH->numFiles = 0; |
426 dbH->dim = 0; | 426 dbH->dim = 0; |
427 dbH->flags = 0; | 427 dbH->flags = 0; |
428 dbH->length = 0; | 428 dbH->length = 0; |
429 dbH->fileTableOffset = ALIGN_UP(O2_HEADERSIZE, 8); | 429 dbH->fileTableOffset = htonl(ALIGN_UP(O2_HEADERSIZE, 8)); |
430 dbH->trackTableOffset = ALIGN_UP(dbH->fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES, 8); | 430 dbH->trackTableOffset = htonl(ALIGN_UP(ntohl(dbH->fileTableOffset) + O2_FILETABLESIZE*O2_MAXFILES, 8)); |
431 dbH->dataOffset = ALIGN_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES, 8); | 431 dbH->dataOffset = htonl(ALIGN_UP(ntohl(dbH->trackTableOffset) + O2_TRACKTABLESIZE*O2_MAXFILES, 8)); |
432 dbH->l2normTableOffset = ALIGN_DOWN(O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8); | 432 dbH->l2normTableOffset = htonl(ALIGN_DOWN(O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8)); |
433 dbH->timesTableOffset = ALIGN_DOWN(dbH->l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8); | 433 dbH->timesTableOffset = htonl(ALIGN_DOWN(ntohl(dbH->l2normTableOffset) - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8)); |
434 | 434 |
435 memcpy (db, dbH, O2_HEADERSIZE); | 435 memcpy (db, dbH, O2_HEADERSIZE); |
436 if(verbosity) { | 436 if(verbosity) { |
437 cerr << COM_CREATE << " " << dbName << endl; | 437 cerr << COM_CREATE << " " << dbName << endl; |
438 } | 438 } |
439 } | 439 } |
440 | |
441 | 440 |
442 void audioDB::drop(){ | 441 void audioDB::drop(){ |
443 // FIXME: drop something? Should we even allow this? | 442 // FIXME: drop something? Should we even allow this? |
444 } | 443 } |
445 | 444 |
465 | 464 |
466 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { | 465 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { |
467 error("error reading db header", dbName, "read"); | 466 error("error reading db header", dbName, "read"); |
468 } | 467 } |
469 | 468 |
470 if(dbH->magic == O2_OLD_MAGIC) { | 469 if(ntohl(dbH->magic) == O2_OLD_MAGIC) { |
471 // FIXME: if anyone ever complains, write the program to convert | 470 // FIXME: if anyone ever complains, write the program to convert |
472 // from the old audioDB format to the new... | 471 // from the old audioDB format to the new... |
473 error("database file has old O2 header", dbName); | 472 error("database file has old O2 header", dbName); |
474 } | 473 } |
475 | 474 |
476 if(dbH->magic != O2_MAGIC) { | 475 if(ntohl(dbH->magic) != O2_MAGIC) { |
477 cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl; | 476 cerr << "expected: " << O2_MAGIC << ", got: " << ntohl(dbH->magic) << endl; |
478 error("database file has incorrect header", dbName); | 477 error("database file has incorrect header", dbName); |
479 } | 478 } |
480 | 479 |
481 if(dbH->version != O2_FORMAT_VERSION) { | 480 if(ntohl(dbH->version) != O2_FORMAT_VERSION) { |
482 error("database file has incorect version", dbName); | 481 error("database file has incorect version", dbName); |
483 } | 482 } |
484 | 483 |
485 if(inFile) | 484 if(inFile) { |
486 if(dbH->dim == 0 && dbH->length == 0) // empty database | 485 uint32_t inDim; |
487 // initialize with input dimensionality | 486 read(infid, &inDim, sizeof(uint32_t)); |
488 read(infid, &dbH->dim, sizeof(unsigned)); | 487 if(ntohl(dbH->dim) == 0 && ntohl(dbH->length) == 0) { |
489 else { | 488 // empty database: initialize with input dimensionality |
490 unsigned test; | 489 dbH->dim = htonl(inDim); |
491 read(infid, &test, sizeof(unsigned)); | 490 } else { |
492 if(dbH->dim != test) { | 491 if(dbH->dim != htonl(inDim)) { |
493 cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <<endl; | 492 cerr << "error: expected dimension: " << ntohl(dbH->dim) << ", got : " << inDim << endl; |
494 error("feature dimensions do not match database table dimensions"); | 493 error("feature dimensions do not match database table dimensions"); |
495 } | 494 } |
496 } | 495 } |
496 } | |
497 | 497 |
498 // mmap the input file | 498 // mmap the input file |
499 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) | 499 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) |
500 == (caddr_t) -1) | 500 == (caddr_t) -1) |
501 error("mmap error for input", inFile, "mmap"); | 501 error("mmap error for input", inFile, "mmap"); |
504 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0), | 504 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0), |
505 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | 505 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) |
506 error("mmap error for initting tables of database", "", "mmap"); | 506 error("mmap error for initting tables of database", "", "mmap"); |
507 | 507 |
508 // Make some handy tables with correct types | 508 // Make some handy tables with correct types |
509 fileTable= (char*)(db+dbH->fileTableOffset); | 509 fileTable = (char *)(db + ntohl(dbH->fileTableOffset)); |
510 trackTable = (unsigned*)(db+dbH->trackTableOffset); | 510 trackTable = (unsigned *)(db + ntohl(dbH->trackTableOffset)); |
511 dataBuf = (double*)(db+dbH->dataOffset); | 511 dataBuf = (double *)(db + ntohl(dbH->dataOffset)); |
512 l2normTable = (double*)(db+dbH->l2normTableOffset); | 512 l2normTable = (double *)(db + ntohl(dbH->l2normTableOffset)); |
513 timesTable = (double*)(db+dbH->timesTableOffset); | 513 timesTable = (double *)(db + ntohl(dbH->timesTableOffset)); |
514 } | 514 } |
515 | 515 |
516 void audioDB::insert(const char* dbName, const char* inFile){ | 516 void audioDB::insert(const char* dbName, const char* inFile){ |
517 | 517 |
518 initTables(dbName, 1, inFile); | 518 initTables(dbName, 1, inFile); |
519 | 519 |
520 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 520 if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) |
521 error("Must use timestamps with timestamped database","use --times"); | 521 error("Must use timestamps with timestamped database","use --times"); |
522 | 522 |
523 // Check that there is room for at least 1 more file | 523 // Check that there is room for at least 1 more file |
524 if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int))) | 524 if((char*)timesTable<((char*)dataBuf+ntohl(dbH->length)+statbuf.st_size-sizeof(int))) |
525 error("No more room in database","insert failed: reason database is full."); | 525 error("No more room in database","insert failed: reason database is full."); |
526 | 526 |
527 if(!key) | 527 if(!key) |
528 key=inFile; | 528 key=inFile; |
529 // Linear scan of filenames check for pre-existing feature | 529 // Linear scan of filenames check for pre-existing feature |
530 unsigned alreadyInserted=0; | 530 unsigned alreadyInserted=0; |
531 for(unsigned k=0; k<dbH->numFiles; k++) | 531 for(unsigned k=0; k<ntohl(dbH->numFiles); k++) |
532 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){ | 532 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){ |
533 alreadyInserted=1; | 533 alreadyInserted=1; |
534 break; | 534 break; |
535 } | 535 } |
536 | 536 |
540 } | 540 } |
541 return; | 541 return; |
542 } | 542 } |
543 | 543 |
544 // Make a track index table of features to file indexes | 544 // Make a track index table of features to file indexes |
545 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 545 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); |
546 if(!numVectors){ | 546 if(!numVectors){ |
547 if(verbosity) { | 547 if(verbosity) { |
548 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; | 548 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; |
549 } | 549 } |
550 // CLEAN UP | 550 // CLEAN UP |
552 munmap(db,O2_DEFAULTDBSIZE); | 552 munmap(db,O2_DEFAULTDBSIZE); |
553 close(infid); | 553 close(infid); |
554 return; | 554 return; |
555 } | 555 } |
556 | 556 |
557 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); | 557 strncpy(fileTable + ntohl(dbH->numFiles)*O2_FILETABLESIZE, key, strlen(key)); |
558 | 558 |
559 unsigned insertoffset = dbH->length;// Store current state | 559 unsigned insertoffset = ntohl(dbH->length);// Store current state |
560 | 560 |
561 // Check times status and insert times from file | 561 // Check times status and insert times from file |
562 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); | 562 unsigned timesoffset=insertoffset/(ntohl(dbH->dim)*sizeof(double)); |
563 double* timesdata=timesTable+timesoffset; | 563 double* timesdata=timesTable+timesoffset; |
564 assert(timesdata+numVectors<l2normTable); | 564 assert(timesdata+numVectors<l2normTable); |
565 insertTimeStamps(numVectors, timesFile, timesdata); | 565 insertTimeStamps(numVectors, timesFile, timesdata); |
566 | 566 |
567 // Increment file count | 567 // Increment file count |
568 dbH->numFiles++; | 568 dbH->numFiles = htonl(ntohl(dbH->numFiles) + 1); |
569 | 569 |
570 // Update Header information | 570 // Update Header information |
571 dbH->length+=(statbuf.st_size-sizeof(int)); | 571 dbH->length = htonl(ntohl(dbH->length) + (statbuf.st_size-sizeof(int))); |
572 | 572 |
573 // Copy the header back to the database | 573 // Copy the header back to the database |
574 memcpy (db, dbH, sizeof(dbTableHeaderT)); | 574 memcpy (db, dbH, sizeof(dbTableHeaderT)); |
575 | 575 |
576 // Update track to file index map | 576 // Update track to file index map |
577 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); | 577 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); |
578 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | 578 //memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); |
579 *(trackTable + ntohl(dbH->numFiles) - 1) = numVectors; | |
579 | 580 |
580 // Update the feature database | 581 // Update the feature database |
581 memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); | 582 memcpy (db+ntohl(dbH->dataOffset)+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); |
582 | 583 |
583 // Norm the vectors on input if the database is already L2 normed | 584 // Norm the vectors on input if the database is already L2 normed |
584 if(dbH->flags & O2_FLAG_L2NORM) | 585 if(ntohl(dbH->flags) & O2_FLAG_L2NORM) |
585 unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append | 586 unitNormAndInsertL2((double*)(db+ntohl(dbH->dataOffset)+insertoffset), ntohl(dbH->dim), numVectors, 1); // append |
586 | 587 |
587 // Report status | 588 // Report status |
588 status(dbName); | 589 status(dbName); |
589 if(verbosity) { | 590 if(verbosity) { |
590 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " | 591 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " |
597 } | 598 } |
598 | 599 |
599 void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){ | 600 void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){ |
600 unsigned numtimes=0; | 601 unsigned numtimes=0; |
601 if(usingTimes){ | 602 if(usingTimes){ |
602 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) | 603 if(!(ntohl(dbH->flags) & O2_FLAG_TIMES) && !(ntohl(dbH->numFiles))) |
603 dbH->flags=dbH->flags|O2_FLAG_TIMES; | 604 dbH->flags = htonl(ntohl(dbH->flags) | O2_FLAG_TIMES); |
604 else if(!(dbH->flags&O2_FLAG_TIMES)){ | 605 else if(!(ntohl(dbH->flags) & O2_FLAG_TIMES)) { |
605 cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl; | 606 cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl; |
606 usingTimes=0; | 607 usingTimes=0; |
607 } | 608 } |
608 | 609 |
609 if(!timesFile->is_open()){ | 610 if(!timesFile->is_open()){ |
610 if(dbH->flags & O2_FLAG_TIMES){ | 611 if(ntohl(dbH->flags) & O2_FLAG_TIMES){ |
611 munmap(indata,statbuf.st_size); | 612 munmap(indata,statbuf.st_size); |
612 munmap(db,O2_DEFAULTDBSIZE); | 613 munmap(db,O2_DEFAULTDBSIZE); |
613 error("problem opening times file on timestamped database",timesFileName); | 614 error("problem opening times file on timestamped database",timesFileName); |
614 } | 615 } |
615 else{ | 616 else{ |
672 assert(dbH); | 673 assert(dbH); |
673 | 674 |
674 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) | 675 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) |
675 error("error reading db header"); | 676 error("error reading db header"); |
676 | 677 |
677 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 678 if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) |
678 error("Must use timestamps with timestamped database","use --times"); | 679 error("Must use timestamps with timestamped database","use --times"); |
679 | 680 |
680 if(dbH->magic!=O2_MAGIC){ | 681 if(ntohl(dbH->magic)!=O2_MAGIC){ |
681 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; | 682 cerr << "expected:" << O2_MAGIC << ", got:" << ntohl(dbH->magic) << endl; |
682 error("database file has incorrect header",dbName); | 683 error("database file has incorrect header",dbName); |
683 } | 684 } |
684 | 685 |
685 unsigned totalVectors=0; | 686 unsigned totalVectors=0; |
686 char *thisKey = new char[MAXSTR]; | 687 char *thisKey = new char[MAXSTR]; |
711 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, | 712 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, |
712 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | 713 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) |
713 error("mmap error for batchinsert into database", "", "mmap"); | 714 error("mmap error for batchinsert into database", "", "mmap"); |
714 | 715 |
715 // Make some handy tables with correct types | 716 // Make some handy tables with correct types |
716 fileTable= (char*)(db+dbH->fileTableOffset); | 717 fileTable= (char*)(db + ntohl(dbH->fileTableOffset)); |
717 trackTable = (unsigned*)(db+dbH->trackTableOffset); | 718 trackTable = (unsigned*)(db + ntohl(dbH->trackTableOffset)); |
718 dataBuf = (double*)(db+dbH->dataOffset); | 719 dataBuf = (double*)(db + ntohl(dbH->dataOffset)); |
719 l2normTable = (double*)(db+dbH->l2normTableOffset); | 720 l2normTable = (double*)(db+ntohl(dbH->l2normTableOffset)); |
720 timesTable = (double*)(db+dbH->timesTableOffset); | 721 timesTable = (double*)(db+ntohl(dbH->timesTableOffset)); |
721 | 722 |
722 // Check that there is room for at least 1 more file | 723 // Check that there is room for at least 1 more file |
723 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) | 724 if((char*)timesTable<((char*)dataBuf+(ntohl(dbH->length)+statbuf.st_size-sizeof(int)))) |
724 error("No more room in database","insert failed: reason database is full."); | 725 error("No more room in database","insert failed: reason database is full."); |
725 | 726 |
726 if(thisFile) | 727 if(thisFile) { |
727 if(dbH->dim==0 && dbH->length==0) // empty database | 728 uint32_t thisDim; |
728 read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality | 729 read(infid,&thisDim,sizeof(uint32_t)); |
729 else { | 730 if(ntohl(dbH->dim) == 0 && ntohl(dbH->length)==0) { |
730 unsigned test; | 731 // empty database: initialize with input dimensionality |
731 read(infid,&test,sizeof(unsigned)); | 732 dbH->dim = htonl(thisDim); |
732 if(dbH->dim!=test){ | 733 } else { |
733 cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl; | 734 if(ntohl(dbH->dim) != thisDim) { |
735 cerr << "error: expected dimension: " << ntohl(dbH->dim) << ", got :" << thisDim <<endl; | |
734 error("feature dimensions do not match database table dimensions"); | 736 error("feature dimensions do not match database table dimensions"); |
735 } | 737 } |
736 } | 738 } |
737 | 739 } |
738 // mmap the input file | 740 // mmap the input file |
739 if (thisFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) | 741 if (thisFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) |
740 == (caddr_t) -1) | 742 == (caddr_t) -1) |
741 error("mmap error for input", "", "mmap"); | 743 error("mmap error for input", "", "mmap"); |
742 | 744 |
743 | 745 |
744 // Linear scan of filenames check for pre-existing feature | 746 // Linear scan of filenames check for pre-existing feature |
745 unsigned alreadyInserted=0; | 747 unsigned alreadyInserted=0; |
746 | 748 |
747 for(unsigned k=0; k<dbH->numFiles; k++) | 749 for(unsigned k=0; k < ntohl(dbH->numFiles); k++) |
748 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){ | 750 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){ |
749 alreadyInserted=1; | 751 alreadyInserted=1; |
750 break; | 752 break; |
751 } | 753 } |
752 | 754 |
756 } | 758 } |
757 } | 759 } |
758 else{ | 760 else{ |
759 | 761 |
760 // Make a track index table of features to file indexes | 762 // Make a track index table of features to file indexes |
761 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 763 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); |
762 if(!numVectors){ | 764 if(!numVectors){ |
763 if(verbosity) { | 765 if(verbosity) { |
764 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; | 766 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; |
765 } | 767 } |
766 } | 768 } |
769 if(timesFile->eof()) | 771 if(timesFile->eof()) |
770 error("not enough timestamp files in timesList"); | 772 error("not enough timestamp files in timesList"); |
771 thisTimesFile=new ifstream(thisTimesFileName,ios::in); | 773 thisTimesFile=new ifstream(thisTimesFileName,ios::in); |
772 if(!thisTimesFile->is_open()) | 774 if(!thisTimesFile->is_open()) |
773 error("Cannot open timestamp file",thisTimesFileName); | 775 error("Cannot open timestamp file",thisTimesFileName); |
774 unsigned insertoffset=dbH->length; | 776 unsigned insertoffset= ntohl(dbH->length); |
775 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); | 777 unsigned timesoffset=insertoffset/(ntohl(dbH->dim)*sizeof(double)); |
776 double* timesdata=timesTable+timesoffset; | 778 double* timesdata=timesTable+timesoffset; |
777 assert(timesdata+numVectors<l2normTable); | 779 assert(timesdata+numVectors<l2normTable); |
778 insertTimeStamps(numVectors,thisTimesFile,timesdata); | 780 insertTimeStamps(numVectors,thisTimesFile,timesdata); |
779 if(thisTimesFile) | 781 if(thisTimesFile) |
780 delete thisTimesFile; | 782 delete thisTimesFile; |
781 } | 783 } |
782 | 784 |
783 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); | 785 strncpy(fileTable + ntohl(dbH->numFiles)*O2_FILETABLESIZE, thisKey, strlen(thisKey)); |
784 | 786 |
785 unsigned insertoffset = dbH->length;// Store current state | 787 unsigned insertoffset = ntohl(dbH->length);// Store current state |
786 | 788 |
787 // Increment file count | 789 // Increment file count |
788 dbH->numFiles++; | 790 dbH->numFiles = htonl(ntohl(dbH->numFiles) + 1); |
789 | 791 |
790 // Update Header information | 792 // Update Header information |
791 dbH->length+=(statbuf.st_size-sizeof(int)); | 793 dbH->length = htonl(ntohl(dbH->length) + (statbuf.st_size-sizeof(int))); |
792 // Copy the header back to the database | 794 // Copy the header back to the database |
793 memcpy (db, dbH, sizeof(dbTableHeaderT)); | 795 memcpy (db, dbH, sizeof(dbTableHeaderT)); |
794 | 796 |
795 // Update track to file index map | 797 // Update track to file index map |
796 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); | 798 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); |
797 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | 799 //memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); |
800 *(trackTable + ntohl(dbH->numFiles) - 1) = numVectors; | |
798 | 801 |
799 // Update the feature database | 802 // Update the feature database |
800 memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); | 803 memcpy (db+ntohl(dbH->dataOffset)+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); |
801 | 804 |
802 // Norm the vectors on input if the database is already L2 normed | 805 // Norm the vectors on input if the database is already L2 normed |
803 if(dbH->flags & O2_FLAG_L2NORM) | 806 if(ntohl(dbH->flags) & O2_FLAG_L2NORM) |
804 unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append | 807 unitNormAndInsertL2((double*)(db+ntohl(dbH->dataOffset)+insertoffset), ntohl(dbH->dim), numVectors, 1); // append |
805 | 808 |
806 totalVectors+=numVectors; | 809 totalVectors+=numVectors; |
807 } | 810 } |
808 } | 811 } |
809 // CLEAN UP | 812 // CLEAN UP |
817 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | 820 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) |
818 error("mmap error for creating database", "", "mmap"); | 821 error("mmap error for creating database", "", "mmap"); |
819 | 822 |
820 if(verbosity) { | 823 if(verbosity) { |
821 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " | 824 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " |
822 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; | 825 << totalVectors*ntohl(dbH->dim)*sizeof(double) << " bytes." << endl; |
823 } | 826 } |
824 | 827 |
825 // Report status | 828 // Report status |
826 status(dbName); | 829 status(dbName); |
827 | 830 |
880 if(!dbH) | 883 if(!dbH) |
881 initTables(dbName, 0, 0); | 884 initTables(dbName, 0, 0); |
882 | 885 |
883 unsigned dudCount=0; | 886 unsigned dudCount=0; |
884 unsigned nullCount=0; | 887 unsigned nullCount=0; |
885 for(unsigned k=0; k<dbH->numFiles; k++){ | 888 for(unsigned k=0; k<ntohl(dbH->numFiles); k++){ |
886 if(trackTable[k]<sequenceLength){ | 889 if(trackTable[k]<sequenceLength){ |
887 dudCount++; | 890 dudCount++; |
888 if(!trackTable[k]) | 891 if(!trackTable[k]) |
889 nullCount++; | 892 nullCount++; |
890 } | 893 } |
891 } | 894 } |
892 | 895 |
893 if(adbStatusResult == 0) { | 896 if(adbStatusResult == 0) { |
894 | 897 |
895 // Update Header information | 898 // Update Header information |
896 cout << "num files:" << dbH->numFiles << endl; | 899 cout << "num files:" << ntohl(dbH->numFiles) << endl; |
897 cout << "data dim:" << dbH->dim <<endl; | 900 cout << "data dim:" << ntohl(dbH->dim) <<endl; |
898 if(dbH->dim>0){ | 901 if(ntohl(dbH->dim)>0){ |
899 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl; | 902 cout << "total vectors:" << ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim))<<endl; |
900 cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << endl; | 903 cout << "vectors available:" << (ntohl(dbH->timesTableOffset)-(ntohl(dbH->dataOffset)+ntohl(dbH->length)))/(sizeof(double)*ntohl(dbH->dim)) << endl; |
901 } | 904 } |
902 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; | 905 cout << "total bytes:" << ntohl(dbH->length) << " (" << (100.0*ntohl(dbH->length))/(ntohl(dbH->timesTableOffset)-ntohl(dbH->dataOffset)) << "%)" << endl; |
903 cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << | 906 cout << "bytes available:" << ntohl(dbH->timesTableOffset)-(ntohl(dbH->dataOffset)+ntohl(dbH->length)) << " (" << |
904 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; | 907 (100.0*(ntohl(dbH->timesTableOffset)-(ntohl(dbH->dataOffset)+ntohl(dbH->length))))/(ntohl(dbH->timesTableOffset)-ntohl(dbH->dataOffset)) << "%)" << endl; |
905 cout << "flags:" << dbH->flags << endl; | 908 cout << "flags:" << ntohl(dbH->flags) << endl; |
906 | 909 |
907 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; | 910 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; |
908 } else { | 911 } else { |
909 adbStatusResult->numFiles = dbH->numFiles; | 912 adbStatusResult->numFiles = ntohl(dbH->numFiles); |
910 adbStatusResult->dim = dbH->dim; | 913 adbStatusResult->dim = ntohl(dbH->dim); |
911 adbStatusResult->length = dbH->length; | 914 adbStatusResult->length = ntohl(dbH->length); |
912 adbStatusResult->dudCount = dudCount; | 915 adbStatusResult->dudCount = dudCount; |
913 adbStatusResult->nullCount = nullCount; | 916 adbStatusResult->nullCount = nullCount; |
914 adbStatusResult->flags = dbH->flags; | 917 adbStatusResult->flags = ntohl(dbH->flags); |
915 } | 918 } |
916 } | 919 } |
917 | 920 |
918 void audioDB::dump(const char* dbName){ | 921 void audioDB::dump(const char* dbName){ |
919 if(!dbH) | 922 if(!dbH) |
920 initTables(dbName, 0, 0); | 923 initTables(dbName, 0, 0); |
921 | 924 |
922 for(unsigned k=0, j=0; k<dbH->numFiles; k++){ | 925 for(unsigned k=0, j=0; k<ntohl(dbH->numFiles); k++){ |
923 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; | 926 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; |
924 j+=trackTable[k]; | 927 j+=trackTable[k]; |
925 } | 928 } |
926 | 929 |
927 status(dbName); | 930 status(dbName); |
928 } | 931 } |
929 | 932 |
930 void audioDB::l2norm(const char* dbName){ | 933 void audioDB::l2norm(const char* dbName){ |
931 initTables(dbName, true, 0); | 934 initTables(dbName, true, 0); |
932 if(dbH->length>0){ | 935 if(ntohl(dbH->length)>0){ |
933 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); | 936 unsigned numVectors = ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim)); |
934 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append | 937 unitNormAndInsertL2(dataBuf, ntohl(dbH->dim), numVectors, 0); // No append |
935 } | 938 } |
936 // Update database flags | 939 // Update database flags |
937 dbH->flags = dbH->flags|O2_FLAG_L2NORM; | 940 dbH->flags = htonl(ntohl(dbH->flags) | O2_FLAG_L2NORM); |
938 memcpy (db, dbH, O2_HEADERSIZE); | 941 memcpy (db, dbH, O2_HEADERSIZE); |
939 } | 942 } |
940 | 943 |
941 | 944 |
942 | 945 |
960 } | 963 } |
961 } | 964 } |
962 | 965 |
963 //return ordinal position of key in keyTable | 966 //return ordinal position of key in keyTable |
964 unsigned audioDB::getKeyPos(char* key){ | 967 unsigned audioDB::getKeyPos(char* key){ |
965 for(unsigned k=0; k<dbH->numFiles; k++) | 968 for(unsigned k=0; k< ntohl(dbH->numFiles); k++) |
966 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) | 969 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) |
967 return k; | 970 return k; |
968 error("Key not found",key); | 971 error("Key not found",key); |
969 return O2_ERR_KEYNOTFOUND; | 972 return O2_ERR_KEYNOTFOUND; |
970 } | 973 } |
974 | 977 |
975 initTables(dbName, 0, inFile); | 978 initTables(dbName, 0, inFile); |
976 | 979 |
977 // For each input vector, find the closest pointNN matching output vectors and report | 980 // For each input vector, find the closest pointNN matching output vectors and report |
978 // we use stdout in this stub version | 981 // we use stdout in this stub version |
979 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 982 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); |
980 | 983 |
981 double* query = (double*)(indata+sizeof(int)); | 984 double* query = (double*)(indata+sizeof(int)); |
982 double* data = dataBuf; | 985 double* data = dataBuf; |
983 double* queryCopy = 0; | 986 double* queryCopy = 0; |
984 | 987 |
985 if( dbH->flags & O2_FLAG_L2NORM ){ | 988 if(ntohl(dbH->flags) & O2_FLAG_L2NORM ){ |
986 // Make a copy of the query | 989 // Make a copy of the query |
987 queryCopy = new double[numVectors*dbH->dim]; | 990 queryCopy = new double[numVectors*ntohl(dbH->dim)]; |
988 qNorm = new double[numVectors]; | 991 qNorm = new double[numVectors]; |
989 assert(queryCopy&&qNorm); | 992 assert(queryCopy&&qNorm); |
990 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | 993 memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double)); |
991 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | 994 unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm); |
992 query = queryCopy; | 995 query = queryCopy; |
993 } | 996 } |
994 | 997 |
995 // Make temporary dynamic memory for results | 998 // Make temporary dynamic memory for results |
996 assert(pointNN>0 && pointNN<=O2_MAXNN); | 999 assert(pointNN>0 && pointNN<=O2_MAXNN); |
1005 | 1008 |
1006 unsigned j=numVectors; | 1009 unsigned j=numVectors; |
1007 unsigned k,l,n; | 1010 unsigned k,l,n; |
1008 double thisDist; | 1011 double thisDist; |
1009 | 1012 |
1010 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double)); | 1013 unsigned totalVecs=ntohl(dbH->length)/(ntohl(dbH->dim)*sizeof(double)); |
1011 double meanQdur = 0; | 1014 double meanQdur = 0; |
1012 double* timesdata = 0; | 1015 double* timesdata = 0; |
1013 double* dbdurs = 0; | 1016 double* dbdurs = 0; |
1014 | 1017 |
1015 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | 1018 if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){ |
1016 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | 1019 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; |
1017 usingTimes=0; | 1020 usingTimes=0; |
1018 } | 1021 } |
1019 | 1022 |
1020 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 1023 else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) |
1021 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | 1024 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; |
1022 | 1025 |
1023 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | 1026 else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){ |
1024 timesdata = new double[numVectors]; | 1027 timesdata = new double[numVectors]; |
1025 insertTimeStamps(numVectors, timesFile, timesdata); | 1028 insertTimeStamps(numVectors, timesFile, timesdata); |
1026 // Calculate durations of points | 1029 // Calculate durations of points |
1027 for(k=0; k<numVectors-1; k++){ | 1030 for(k=0; k<numVectors-1; k++){ |
1028 timesdata[k]=timesdata[k+1]-timesdata[k]; | 1031 timesdata[k]=timesdata[k+1]-timesdata[k]; |
1041 error("queryPoint > numVectors in query"); | 1044 error("queryPoint > numVectors in query"); |
1042 else{ | 1045 else{ |
1043 if(verbosity>1) { | 1046 if(verbosity>1) { |
1044 cerr << "query point: " << queryPoint << endl; cerr.flush(); | 1047 cerr << "query point: " << queryPoint << endl; cerr.flush(); |
1045 } | 1048 } |
1046 query=query+queryPoint*dbH->dim; | 1049 query=query+queryPoint*ntohl(dbH->dim); |
1047 numVectors=queryPoint+1; | 1050 numVectors=queryPoint+1; |
1048 j=1; | 1051 j=1; |
1049 } | 1052 } |
1050 | 1053 |
1051 gettimeofday(&tv1, NULL); | 1054 gettimeofday(&tv1, NULL); |
1052 while(j--){ // query | 1055 while(j--){ // query |
1053 data=dataBuf; | 1056 data=dataBuf; |
1054 k=totalVecs; // number of database vectors | 1057 k=totalVecs; // number of database vectors |
1055 while(k--){ // database | 1058 while(k--){ // database |
1056 thisDist=0; | 1059 thisDist=0; |
1057 l=dbH->dim; | 1060 l=ntohl(dbH->dim); |
1058 double* q=query; | 1061 double* q=query; |
1059 while(l--) | 1062 while(l--) |
1060 thisDist+=*q++**data++; | 1063 thisDist+=*q++**data++; |
1061 if(!usingTimes || | 1064 if(!usingTimes || |
1062 (usingTimes | 1065 (usingTimes |
1071 qIndexes[l]=qIndexes[l-1]; | 1074 qIndexes[l]=qIndexes[l-1]; |
1072 sIndexes[l]=sIndexes[l-1]; | 1075 sIndexes[l]=sIndexes[l-1]; |
1073 } | 1076 } |
1074 distances[n]=thisDist; | 1077 distances[n]=thisDist; |
1075 qIndexes[n]=numVectors-j-1; | 1078 qIndexes[n]=numVectors-j-1; |
1076 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1; | 1079 sIndexes[n]=ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim))-k-1; |
1077 break; | 1080 break; |
1078 } | 1081 } |
1079 } | 1082 } |
1080 else | 1083 else |
1081 break; | 1084 break; |
1082 } | 1085 } |
1083 } | 1086 } |
1084 } | 1087 } |
1085 // Move query pointer to next query point | 1088 // Move query pointer to next query point |
1086 query+=dbH->dim; | 1089 query+=ntohl(dbH->dim); |
1087 } | 1090 } |
1088 | 1091 |
1089 gettimeofday(&tv2, NULL); | 1092 gettimeofday(&tv2, NULL); |
1090 if(verbosity>1) { | 1093 if(verbosity>1) { |
1091 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; | 1094 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; |
1095 // Output answer | 1098 // Output answer |
1096 // Loop over nearest neighbours | 1099 // Loop over nearest neighbours |
1097 for(k=0; k < pointNN; k++){ | 1100 for(k=0; k < pointNN; k++){ |
1098 // Scan for key | 1101 // Scan for key |
1099 unsigned cumTrack=0; | 1102 unsigned cumTrack=0; |
1100 for(l=0 ; l<dbH->numFiles; l++){ | 1103 for(l=0 ; l<ntohl(dbH->numFiles); l++){ |
1101 cumTrack+=trackTable[l]; | 1104 cumTrack+=trackTable[l]; |
1102 if(sIndexes[k]<cumTrack){ | 1105 if(sIndexes[k]<cumTrack){ |
1103 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " | 1106 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " |
1104 << sIndexes[k]+trackTable[l]-cumTrack << endl; | 1107 << sIndexes[k]+trackTable[l]-cumTrack << endl; |
1105 break; | 1108 break; |
1126 for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){ | 1129 for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){ |
1127 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; | 1130 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; |
1128 adbQueryResult->Dist[k]=distances[k]; | 1131 adbQueryResult->Dist[k]=distances[k]; |
1129 adbQueryResult->Qpos[k]=qIndexes[k]; | 1132 adbQueryResult->Qpos[k]=qIndexes[k]; |
1130 unsigned cumTrack=0; | 1133 unsigned cumTrack=0; |
1131 for(l=0 ; l<dbH->numFiles; l++){ | 1134 for(l=0 ; l<ntohl(dbH->numFiles); l++){ |
1132 cumTrack+=trackTable[l]; | 1135 cumTrack+=trackTable[l]; |
1133 if(sIndexes[k]<cumTrack){ | 1136 if(sIndexes[k]<cumTrack){ |
1134 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); | 1137 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); |
1135 break; | 1138 break; |
1136 } | 1139 } |
1155 // uses average of pointNN points per track | 1158 // uses average of pointNN points per track |
1156 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ | 1159 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ |
1157 initTables(dbName, 0, inFile); | 1160 initTables(dbName, 0, inFile); |
1158 | 1161 |
1159 // For each input vector, find the closest pointNN matching output vectors and report | 1162 // For each input vector, find the closest pointNN matching output vectors and report |
1160 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 1163 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); |
1161 double* query = (double*)(indata+sizeof(int)); | 1164 double* query = (double*)(indata+sizeof(int)); |
1162 double* data = dataBuf; | 1165 double* data = dataBuf; |
1163 double* queryCopy = 0; | 1166 double* queryCopy = 0; |
1164 | 1167 |
1165 if( dbH->flags & O2_FLAG_L2NORM ){ | 1168 if( ntohl(dbH->flags) & O2_FLAG_L2NORM ){ |
1166 // Make a copy of the query | 1169 // Make a copy of the query |
1167 queryCopy = new double[numVectors*dbH->dim]; | 1170 queryCopy = new double[numVectors*ntohl(dbH->dim)]; |
1168 qNorm = new double[numVectors]; | 1171 qNorm = new double[numVectors]; |
1169 assert(queryCopy&&qNorm); | 1172 assert(queryCopy&&qNorm); |
1170 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | 1173 memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double)); |
1171 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | 1174 unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm); |
1172 query = queryCopy; | 1175 query = queryCopy; |
1173 } | 1176 } |
1174 | 1177 |
1175 assert(pointNN>0 && pointNN<=O2_MAXNN); | 1178 assert(pointNN>0 && pointNN<=O2_MAXNN); |
1176 assert(trackNN>0 && trackNN<=O2_MAXNN); | 1179 assert(trackNN>0 && trackNN<=O2_MAXNN); |
1204 | 1207 |
1205 double meanQdur = 0; | 1208 double meanQdur = 0; |
1206 double* timesdata = 0; | 1209 double* timesdata = 0; |
1207 double* meanDBdur = 0; | 1210 double* meanDBdur = 0; |
1208 | 1211 |
1209 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | 1212 if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){ |
1210 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | 1213 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; |
1211 usingTimes=0; | 1214 usingTimes=0; |
1212 } | 1215 } |
1213 | 1216 |
1214 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 1217 else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) |
1215 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | 1218 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; |
1216 | 1219 |
1217 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | 1220 else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){ |
1218 timesdata = new double[numVectors]; | 1221 timesdata = new double[numVectors]; |
1219 insertTimeStamps(numVectors, timesFile, timesdata); | 1222 insertTimeStamps(numVectors, timesFile, timesdata); |
1220 // Calculate durations of points | 1223 // Calculate durations of points |
1221 for(k=0; k<numVectors-1; k++){ | 1224 for(k=0; k<numVectors-1; k++){ |
1222 timesdata[k]=timesdata[k+1]-timesdata[k]; | 1225 timesdata[k]=timesdata[k+1]-timesdata[k]; |
1223 meanQdur+=timesdata[k]; | 1226 meanQdur+=timesdata[k]; |
1224 } | 1227 } |
1225 meanQdur/=k; | 1228 meanQdur/=k; |
1226 meanDBdur = new double[dbH->numFiles]; | 1229 meanDBdur = new double[ntohl(dbH->numFiles)]; |
1227 for(k=0; k<dbH->numFiles; k++){ | 1230 for(k=0; k<ntohl(dbH->numFiles); k++){ |
1228 meanDBdur[k]=0.0; | 1231 meanDBdur[k]=0.0; |
1229 for(j=0; j<trackTable[k]-1 ; j++) | 1232 for(j=0; j<trackTable[k]-1 ; j++) |
1230 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; | 1233 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; |
1231 meanDBdur[k]/=j; | 1234 meanDBdur[k]/=j; |
1232 } | 1235 } |
1237 error("queryPoint > numVectors in query"); | 1240 error("queryPoint > numVectors in query"); |
1238 else{ | 1241 else{ |
1239 if(verbosity>1) { | 1242 if(verbosity>1) { |
1240 cerr << "query point: " << queryPoint << endl; cerr.flush(); | 1243 cerr << "query point: " << queryPoint << endl; cerr.flush(); |
1241 } | 1244 } |
1242 query=query+queryPoint*dbH->dim; | 1245 query=query+queryPoint*ntohl(dbH->dim); |
1243 numVectors=queryPoint+1; | 1246 numVectors=queryPoint+1; |
1244 } | 1247 } |
1245 | 1248 |
1246 // build track offset table | 1249 // build track offset table |
1247 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; | 1250 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; |
1248 unsigned cumTrack=0; | 1251 unsigned cumTrack=0; |
1249 unsigned trackIndexOffset; | 1252 unsigned trackIndexOffset; |
1250 for(k=0; k<dbH->numFiles;k++){ | 1253 for(k=0; k<ntohl(dbH->numFiles);k++){ |
1251 trackOffsetTable[k]=cumTrack; | 1254 trackOffsetTable[k]=cumTrack; |
1252 cumTrack+=trackTable[k]*dbH->dim; | 1255 cumTrack+=trackTable[k]*ntohl(dbH->dim); |
1253 } | 1256 } |
1254 | 1257 |
1255 char nextKey[MAXSTR]; | 1258 char nextKey[MAXSTR]; |
1256 | 1259 |
1257 gettimeofday(&tv1, NULL); | 1260 gettimeofday(&tv1, NULL); |
1258 | 1261 |
1259 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ | 1262 for(processedTracks=0, track=0 ; processedTracks < ntohl(dbH->numFiles) ; track++, processedTracks++){ |
1260 if(trackFile){ | 1263 if(trackFile){ |
1261 if(!trackFile->eof()){ | 1264 if(!trackFile->eof()){ |
1262 trackFile->getline(nextKey,MAXSTR); | 1265 trackFile->getline(nextKey,MAXSTR); |
1263 track=getKeyPos(nextKey); | 1266 track=getKeyPos(nextKey); |
1264 } | 1267 } |
1265 else | 1268 else |
1266 break; | 1269 break; |
1267 } | 1270 } |
1268 trackOffset=trackOffsetTable[track]; // numDoubles offset | 1271 trackOffset=trackOffsetTable[track]; // numDoubles offset |
1269 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | 1272 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset |
1270 if(verbosity>7) { | 1273 if(verbosity>7) { |
1271 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); | 1274 cerr << track << "." << trackOffset/(ntohl(dbH->dim)) << "." << trackTable[track] << " | ";cerr.flush(); |
1272 } | 1275 } |
1273 | 1276 |
1274 if(dbH->flags & O2_FLAG_L2NORM) | 1277 if(ntohl(dbH->flags) & O2_FLAG_L2NORM) |
1275 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; | 1278 usingQueryPoint?query=queryCopy+queryPoint*ntohl(dbH->dim):query=queryCopy; |
1276 else | 1279 else |
1277 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); | 1280 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*ntohl(dbH->dim):query=(double*)(indata+sizeof(int)); |
1278 if(usingQueryPoint) | 1281 if(usingQueryPoint) |
1279 j=1; | 1282 j=1; |
1280 else | 1283 else |
1281 j=numVectors; | 1284 j=numVectors; |
1282 while(j--){ | 1285 while(j--){ |
1283 k=trackTable[track]; // number of vectors in track | 1286 k=trackTable[track]; // number of vectors in track |
1284 data=dataBuf+trackOffset; // data for track | 1287 data=dataBuf+trackOffset; // data for track |
1285 while(k--){ | 1288 while(k--){ |
1286 thisDist=0; | 1289 thisDist=0; |
1287 l=dbH->dim; | 1290 l=ntohl(dbH->dim); |
1288 double* q=query; | 1291 double* q=query; |
1289 while(l--) | 1292 while(l--) |
1290 thisDist+=*q++**data++; | 1293 thisDist+=*q++**data++; |
1291 if(!usingTimes || | 1294 if(!usingTimes || |
1292 (usingTimes | 1295 (usingTimes |
1311 break; | 1314 break; |
1312 } | 1315 } |
1313 } | 1316 } |
1314 } // track | 1317 } // track |
1315 // Move query pointer to next query point | 1318 // Move query pointer to next query point |
1316 query+=dbH->dim; | 1319 query+=ntohl(dbH->dim); |
1317 } // query | 1320 } // query |
1318 // Take the average of this track's distance | 1321 // Take the average of this track's distance |
1319 // Test the track distances | 1322 // Test the track distances |
1320 thisDist=0; | 1323 thisDist=0; |
1321 for (n = 0; n < pointNN; n++) { | 1324 for (n = 0; n < pointNN; n++) { |
1411 | 1414 |
1412 initTables(dbName, 0, inFile); | 1415 initTables(dbName, 0, inFile); |
1413 | 1416 |
1414 // For each input vector, find the closest pointNN matching output vectors and report | 1417 // For each input vector, find the closest pointNN matching output vectors and report |
1415 // we use stdout in this stub version | 1418 // we use stdout in this stub version |
1416 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 1419 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); |
1417 double* query = (double*)(indata+sizeof(int)); | 1420 double* query = (double*)(indata+sizeof(int)); |
1418 double* queryCopy = 0; | 1421 double* queryCopy = 0; |
1419 | 1422 |
1420 double qMeanL2; | 1423 double qMeanL2; |
1421 double* sMeanL2; | 1424 double* sMeanL2; |
1422 | 1425 |
1423 unsigned USE_THRESH=0; | 1426 unsigned USE_THRESH=0; |
1424 double SILENCE_THRESH=0; | 1427 double SILENCE_THRESH=0; |
1425 double DIFF_THRESH=0; | 1428 double DIFF_THRESH=0; |
1426 | 1429 |
1427 if(!(dbH->flags & O2_FLAG_L2NORM) ) | 1430 if(!(ntohl(dbH->flags) & O2_FLAG_L2NORM) ) |
1428 error("Database must be L2 normed for sequence query","use -L2NORM"); | 1431 error("Database must be L2 normed for sequence query","use -L2NORM"); |
1429 | 1432 |
1430 if(numVectors<sequenceLength) | 1433 if(numVectors<sequenceLength) |
1431 error("Query shorter than requested sequence length", "maybe use -l"); | 1434 error("Query shorter than requested sequence length", "maybe use -l"); |
1432 | 1435 |
1433 if(verbosity>1) { | 1436 if(verbosity>1) { |
1434 cerr << "performing norms ... "; cerr.flush(); | 1437 cerr << "performing norms ... "; cerr.flush(); |
1435 } | 1438 } |
1436 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); | 1439 unsigned dbVectors = ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim)); |
1437 | 1440 |
1438 // Make a copy of the query | 1441 // Make a copy of the query |
1439 queryCopy = new double[numVectors*dbH->dim]; | 1442 queryCopy = new double[numVectors*ntohl(dbH->dim)]; |
1440 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | 1443 memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double)); |
1441 qNorm = new double[numVectors]; | 1444 qNorm = new double[numVectors]; |
1442 sNorm = new double[dbVectors]; | 1445 sNorm = new double[dbVectors]; |
1443 sMeanL2=new double[dbH->numFiles]; | 1446 sMeanL2=new double[ntohl(dbH->numFiles)]; |
1444 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); | 1447 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); |
1445 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | 1448 unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm); |
1446 query = queryCopy; | 1449 query = queryCopy; |
1447 | 1450 |
1448 // Make norm measurements relative to sequenceLength | 1451 // Make norm measurements relative to sequenceLength |
1449 unsigned w = sequenceLength-1; | 1452 unsigned w = sequenceLength-1; |
1450 unsigned i,j; | 1453 unsigned i,j; |
1452 double tmp1,tmp2; | 1455 double tmp1,tmp2; |
1453 | 1456 |
1454 // Copy the L2 norm values to core to avoid disk random access later on | 1457 // Copy the L2 norm values to core to avoid disk random access later on |
1455 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | 1458 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); |
1456 double* snPtr = sNorm; | 1459 double* snPtr = sNorm; |
1457 for(i=0; i<dbH->numFiles; i++){ | 1460 for(i=0; i<ntohl(dbH->numFiles); i++){ |
1458 if(trackTable[i]>=sequenceLength){ | 1461 if(trackTable[i]>=sequenceLength){ |
1459 tmp1=*snPtr; | 1462 tmp1=*snPtr; |
1460 j=1; | 1463 j=1; |
1461 w=sequenceLength-1; | 1464 w=sequenceLength-1; |
1462 while(w--) | 1465 while(w--) |
1478 } | 1481 } |
1479 snPtr+=trackTable[i]; | 1482 snPtr+=trackTable[i]; |
1480 } | 1483 } |
1481 | 1484 |
1482 double* pn = sMeanL2; | 1485 double* pn = sMeanL2; |
1483 w=dbH->numFiles; | 1486 w=ntohl(dbH->numFiles); |
1484 while(w--) | 1487 while(w--) |
1485 *pn++=0.0; | 1488 *pn++=0.0; |
1486 ps=sNorm; | 1489 ps=sNorm; |
1487 unsigned processedTracks=0; | 1490 unsigned processedTracks=0; |
1488 for(i=0; i<dbH->numFiles; i++){ | 1491 for(i=0; i<ntohl(dbH->numFiles); i++){ |
1489 if(trackTable[i]>sequenceLength-1){ | 1492 if(trackTable[i]>sequenceLength-1){ |
1490 w = trackTable[i]-sequenceLength+1; | 1493 w = trackTable[i]-sequenceLength+1; |
1491 pn = sMeanL2+i; | 1494 pn = sMeanL2+i; |
1492 *pn=0; | 1495 *pn=0; |
1493 while(w--) | 1496 while(w--) |
1573 // Timestamp and durations processing | 1576 // Timestamp and durations processing |
1574 double meanQdur = 0; | 1577 double meanQdur = 0; |
1575 double* timesdata = 0; | 1578 double* timesdata = 0; |
1576 double* meanDBdur = 0; | 1579 double* meanDBdur = 0; |
1577 | 1580 |
1578 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | 1581 if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){ |
1579 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | 1582 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; |
1580 usingTimes=0; | 1583 usingTimes=0; |
1581 } | 1584 } |
1582 | 1585 |
1583 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 1586 else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) |
1584 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | 1587 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; |
1585 | 1588 |
1586 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | 1589 else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){ |
1587 timesdata = new double[numVectors]; | 1590 timesdata = new double[numVectors]; |
1588 assert(timesdata); | 1591 assert(timesdata); |
1589 insertTimeStamps(numVectors, timesFile, timesdata); | 1592 insertTimeStamps(numVectors, timesFile, timesdata); |
1590 // Calculate durations of points | 1593 // Calculate durations of points |
1591 for(k=0; k<numVectors-1; k++){ | 1594 for(k=0; k<numVectors-1; k++){ |
1594 } | 1597 } |
1595 meanQdur/=k; | 1598 meanQdur/=k; |
1596 if(verbosity>1) { | 1599 if(verbosity>1) { |
1597 cerr << "mean query file duration: " << meanQdur << endl; | 1600 cerr << "mean query file duration: " << meanQdur << endl; |
1598 } | 1601 } |
1599 meanDBdur = new double[dbH->numFiles]; | 1602 meanDBdur = new double[ntohl(dbH->numFiles)]; |
1600 assert(meanDBdur); | 1603 assert(meanDBdur); |
1601 for(k=0; k<dbH->numFiles; k++){ | 1604 for(k=0; k<ntohl(dbH->numFiles); k++){ |
1602 meanDBdur[k]=0.0; | 1605 meanDBdur[k]=0.0; |
1603 for(j=0; j<trackTable[k]-1 ; j++) | 1606 for(j=0; j<trackTable[k]-1 ; j++) |
1604 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; | 1607 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; |
1605 meanDBdur[k]/=j; | 1608 meanDBdur[k]/=j; |
1606 } | 1609 } |
1611 error("queryPoint > numVectors-wL+1 in query"); | 1614 error("queryPoint > numVectors-wL+1 in query"); |
1612 else{ | 1615 else{ |
1613 if(verbosity>1) { | 1616 if(verbosity>1) { |
1614 cerr << "query point: " << queryPoint << endl; cerr.flush(); | 1617 cerr << "query point: " << queryPoint << endl; cerr.flush(); |
1615 } | 1618 } |
1616 query=query+queryPoint*dbH->dim; | 1619 query=query+queryPoint*ntohl(dbH->dim); |
1617 qNorm=qNorm+queryPoint; | 1620 qNorm=qNorm+queryPoint; |
1618 numVectors=wL; | 1621 numVectors=wL; |
1619 } | 1622 } |
1620 | 1623 |
1621 double ** D = 0; // Differences query and target | 1624 double ** D = 0; // Differences query and target |
1633 double* qp; | 1636 double* qp; |
1634 double* sp; | 1637 double* sp; |
1635 double* dp; | 1638 double* dp; |
1636 | 1639 |
1637 // build track offset table | 1640 // build track offset table |
1638 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; | 1641 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; |
1639 unsigned cumTrack=0; | 1642 unsigned cumTrack=0; |
1640 unsigned trackIndexOffset; | 1643 unsigned trackIndexOffset; |
1641 for(k=0; k<dbH->numFiles;k++){ | 1644 for(k=0; k<ntohl(dbH->numFiles);k++){ |
1642 trackOffsetTable[k]=cumTrack; | 1645 trackOffsetTable[k]=cumTrack; |
1643 cumTrack+=trackTable[k]*dbH->dim; | 1646 cumTrack+=trackTable[k]*ntohl(dbH->dim); |
1644 } | 1647 } |
1645 | 1648 |
1646 char nextKey [MAXSTR]; | 1649 char nextKey [MAXSTR]; |
1647 | 1650 |
1648 // chi^2 statistics | 1651 // chi^2 statistics |
1651 double logSampleSum = 0; | 1654 double logSampleSum = 0; |
1652 double minSample = 1e9; | 1655 double minSample = 1e9; |
1653 double maxSample = 0; | 1656 double maxSample = 0; |
1654 | 1657 |
1655 // Track loop | 1658 // Track loop |
1656 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ | 1659 for(processedTracks=0, track=0 ; processedTracks < ntohl(dbH->numFiles) ; track++, processedTracks++){ |
1657 | 1660 |
1658 // get trackID from file if using a control file | 1661 // get trackID from file if using a control file |
1659 if(trackFile){ | 1662 if(trackFile){ |
1660 if(!trackFile->eof()){ | 1663 if(!trackFile->eof()){ |
1661 trackFile->getline(nextKey,MAXSTR); | 1664 trackFile->getline(nextKey,MAXSTR); |
1664 else | 1667 else |
1665 break; | 1668 break; |
1666 } | 1669 } |
1667 | 1670 |
1668 trackOffset=trackOffsetTable[track]; // numDoubles offset | 1671 trackOffset=trackOffsetTable[track]; // numDoubles offset |
1669 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | 1672 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset |
1670 | 1673 |
1671 if(sequenceLength<=trackTable[track]){ // test for short sequences | 1674 if(sequenceLength<=trackTable[track]){ // test for short sequences |
1672 | 1675 |
1673 if(verbosity>7) { | 1676 if(verbosity>7) { |
1674 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); | 1677 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); |
1688 } | 1691 } |
1689 | 1692 |
1690 // Dot product | 1693 // Dot product |
1691 for(j=0; j<numVectors; j++) | 1694 for(j=0; j<numVectors; j++) |
1692 for(k=0; k<trackTable[track]; k++){ | 1695 for(k=0; k<trackTable[track]; k++){ |
1693 qp=query+j*dbH->dim; | 1696 qp=query+j*ntohl(dbH->dim); |
1694 sp=dataBuf+trackOffset+k*dbH->dim; | 1697 sp=dataBuf+trackOffset+k*ntohl(dbH->dim); |
1695 DD[j][k]=0.0; // Initialize matched filter array | 1698 DD[j][k]=0.0; // Initialize matched filter array |
1696 dp=&D[j][k]; // point to correlation cell j,k | 1699 dp=&D[j][k]; // point to correlation cell j,k |
1697 *dp=0.0; // initialize correlation cell | 1700 *dp=0.0; // initialize correlation cell |
1698 l=dbH->dim; // size of vectors | 1701 l=ntohl(dbH->dim); // size of vectors |
1699 while(l--) | 1702 while(l--) |
1700 *dp+=*qp++**sp++; | 1703 *dp+=*qp++**sp++; |
1701 } | 1704 } |
1702 | 1705 |
1703 // Matched Filter | 1706 // Matched Filter |
1913 | 1916 |
1914 initTables(dbName, 0, inFile); | 1917 initTables(dbName, 0, inFile); |
1915 | 1918 |
1916 // For each input vector, find the closest pointNN matching output vectors and report | 1919 // For each input vector, find the closest pointNN matching output vectors and report |
1917 // we use stdout in this stub version | 1920 // we use stdout in this stub version |
1918 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 1921 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); |
1919 double* query = (double*)(indata+sizeof(int)); | 1922 double* query = (double*)(indata+sizeof(int)); |
1920 double* queryCopy = 0; | 1923 double* queryCopy = 0; |
1921 | 1924 |
1922 double qMeanL2; | 1925 double qMeanL2; |
1923 double* sMeanL2; | 1926 double* sMeanL2; |
1924 | 1927 |
1925 unsigned USE_THRESH=0; | 1928 unsigned USE_THRESH=0; |
1926 double SILENCE_THRESH=0; | 1929 double SILENCE_THRESH=0; |
1927 double DIFF_THRESH=0; | 1930 double DIFF_THRESH=0; |
1928 | 1931 |
1929 if(!(dbH->flags & O2_FLAG_L2NORM) ) | 1932 if(!(ntohl(dbH->flags) & O2_FLAG_L2NORM) ) |
1930 error("Database must be L2 normed for sequence query","use -l2norm"); | 1933 error("Database must be L2 normed for sequence query","use -l2norm"); |
1931 | 1934 |
1932 if(verbosity>1) { | 1935 if(verbosity>1) { |
1933 cerr << "performing norms ... "; cerr.flush(); | 1936 cerr << "performing norms ... "; cerr.flush(); |
1934 } | 1937 } |
1935 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); | 1938 unsigned dbVectors = ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim)); |
1936 | 1939 |
1937 // Make a copy of the query | 1940 // Make a copy of the query |
1938 queryCopy = new double[numVectors*dbH->dim]; | 1941 queryCopy = new double[numVectors*ntohl(dbH->dim)]; |
1939 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); | 1942 memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double)); |
1940 qNorm = new double[numVectors]; | 1943 qNorm = new double[numVectors]; |
1941 sNorm = new double[dbVectors]; | 1944 sNorm = new double[dbVectors]; |
1942 sMeanL2=new double[dbH->numFiles]; | 1945 sMeanL2=new double[ntohl(dbH->numFiles)]; |
1943 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); | 1946 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); |
1944 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); | 1947 unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm); |
1945 query = queryCopy; | 1948 query = queryCopy; |
1946 | 1949 |
1947 // Make norm measurements relative to sequenceLength | 1950 // Make norm measurements relative to sequenceLength |
1948 unsigned w = sequenceLength-1; | 1951 unsigned w = sequenceLength-1; |
1949 unsigned i,j; | 1952 unsigned i,j; |
1951 double tmp1,tmp2; | 1954 double tmp1,tmp2; |
1952 | 1955 |
1953 // Copy the L2 norm values to core to avoid disk random access later on | 1956 // Copy the L2 norm values to core to avoid disk random access later on |
1954 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); | 1957 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); |
1955 double* snPtr = sNorm; | 1958 double* snPtr = sNorm; |
1956 for(i=0; i<dbH->numFiles; i++){ | 1959 for(i=0; i<ntohl(dbH->numFiles); i++){ |
1957 if(trackTable[i]>=sequenceLength){ | 1960 if(trackTable[i]>=sequenceLength){ |
1958 tmp1=*snPtr; | 1961 tmp1=*snPtr; |
1959 j=1; | 1962 j=1; |
1960 w=sequenceLength-1; | 1963 w=sequenceLength-1; |
1961 while(w--) | 1964 while(w--) |
1977 } | 1980 } |
1978 snPtr+=trackTable[i]; | 1981 snPtr+=trackTable[i]; |
1979 } | 1982 } |
1980 | 1983 |
1981 double* pn = sMeanL2; | 1984 double* pn = sMeanL2; |
1982 w=dbH->numFiles; | 1985 w=ntohl(dbH->numFiles); |
1983 while(w--) | 1986 while(w--) |
1984 *pn++=0.0; | 1987 *pn++=0.0; |
1985 ps=sNorm; | 1988 ps=sNorm; |
1986 unsigned processedTracks=0; | 1989 unsigned processedTracks=0; |
1987 for(i=0; i<dbH->numFiles; i++){ | 1990 for(i=0; i<ntohl(dbH->numFiles); i++){ |
1988 if(trackTable[i]>sequenceLength-1){ | 1991 if(trackTable[i]>sequenceLength-1){ |
1989 w = trackTable[i]-sequenceLength+1; | 1992 w = trackTable[i]-sequenceLength+1; |
1990 pn = sMeanL2+i; | 1993 pn = sMeanL2+i; |
1991 *pn=0; | 1994 *pn=0; |
1992 while(w--) | 1995 while(w--) |
2072 // Timestamp and durations processing | 2075 // Timestamp and durations processing |
2073 double meanQdur = 0; | 2076 double meanQdur = 0; |
2074 double* timesdata = 0; | 2077 double* timesdata = 0; |
2075 double* meanDBdur = 0; | 2078 double* meanDBdur = 0; |
2076 | 2079 |
2077 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ | 2080 if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){ |
2078 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; | 2081 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; |
2079 usingTimes=0; | 2082 usingTimes=0; |
2080 } | 2083 } |
2081 | 2084 |
2082 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 2085 else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) |
2083 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; | 2086 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; |
2084 | 2087 |
2085 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ | 2088 else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){ |
2086 timesdata = new double[numVectors]; | 2089 timesdata = new double[numVectors]; |
2087 assert(timesdata); | 2090 assert(timesdata); |
2088 insertTimeStamps(numVectors, timesFile, timesdata); | 2091 insertTimeStamps(numVectors, timesFile, timesdata); |
2089 // Calculate durations of points | 2092 // Calculate durations of points |
2090 for(k=0; k<numVectors-1; k++){ | 2093 for(k=0; k<numVectors-1; k++){ |
2093 } | 2096 } |
2094 meanQdur/=k; | 2097 meanQdur/=k; |
2095 if(verbosity>1) { | 2098 if(verbosity>1) { |
2096 cerr << "mean query file duration: " << meanQdur << endl; | 2099 cerr << "mean query file duration: " << meanQdur << endl; |
2097 } | 2100 } |
2098 meanDBdur = new double[dbH->numFiles]; | 2101 meanDBdur = new double[ntohl(dbH->numFiles)]; |
2099 assert(meanDBdur); | 2102 assert(meanDBdur); |
2100 for(k=0; k<dbH->numFiles; k++){ | 2103 for(k=0; k<ntohl(dbH->numFiles); k++){ |
2101 meanDBdur[k]=0.0; | 2104 meanDBdur[k]=0.0; |
2102 for(j=0; j<trackTable[k]-1 ; j++) | 2105 for(j=0; j<trackTable[k]-1 ; j++) |
2103 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; | 2106 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; |
2104 meanDBdur[k]/=j; | 2107 meanDBdur[k]/=j; |
2105 } | 2108 } |
2110 error("queryPoint > numVectors-wL+1 in query"); | 2113 error("queryPoint > numVectors-wL+1 in query"); |
2111 else{ | 2114 else{ |
2112 if(verbosity>1) { | 2115 if(verbosity>1) { |
2113 cerr << "query point: " << queryPoint << endl; cerr.flush(); | 2116 cerr << "query point: " << queryPoint << endl; cerr.flush(); |
2114 } | 2117 } |
2115 query=query+queryPoint*dbH->dim; | 2118 query=query+queryPoint*ntohl(dbH->dim); |
2116 qNorm=qNorm+queryPoint; | 2119 qNorm=qNorm+queryPoint; |
2117 numVectors=wL; | 2120 numVectors=wL; |
2118 } | 2121 } |
2119 | 2122 |
2120 double ** D = 0; // Differences query and target | 2123 double ** D = 0; // Differences query and target |
2132 double* qp; | 2135 double* qp; |
2133 double* sp; | 2136 double* sp; |
2134 double* dp; | 2137 double* dp; |
2135 | 2138 |
2136 // build track offset table | 2139 // build track offset table |
2137 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; | 2140 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; |
2138 unsigned cumTrack=0; | 2141 unsigned cumTrack=0; |
2139 unsigned trackIndexOffset; | 2142 unsigned trackIndexOffset; |
2140 for(k=0; k<dbH->numFiles;k++){ | 2143 for(k=0; k<ntohl(dbH->numFiles);k++){ |
2141 trackOffsetTable[k]=cumTrack; | 2144 trackOffsetTable[k]=cumTrack; |
2142 cumTrack+=trackTable[k]*dbH->dim; | 2145 cumTrack+=trackTable[k]*ntohl(dbH->dim); |
2143 } | 2146 } |
2144 | 2147 |
2145 char nextKey [MAXSTR]; | 2148 char nextKey [MAXSTR]; |
2146 | 2149 |
2147 // chi^2 statistics | 2150 // chi^2 statistics |
2150 double logSampleSum = 0; | 2153 double logSampleSum = 0; |
2151 double minSample = 1e9; | 2154 double minSample = 1e9; |
2152 double maxSample = 0; | 2155 double maxSample = 0; |
2153 | 2156 |
2154 // Track loop | 2157 // Track loop |
2155 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ | 2158 for(processedTracks=0, track=0 ; processedTracks < ntohl(dbH->numFiles) ; track++, processedTracks++){ |
2156 | 2159 |
2157 // get trackID from file if using a control file | 2160 // get trackID from file if using a control file |
2158 if(trackFile){ | 2161 if(trackFile){ |
2159 if(!trackFile->eof()){ | 2162 if(!trackFile->eof()){ |
2160 trackFile->getline(nextKey,MAXSTR); | 2163 trackFile->getline(nextKey,MAXSTR); |
2163 else | 2166 else |
2164 break; | 2167 break; |
2165 } | 2168 } |
2166 | 2169 |
2167 trackOffset=trackOffsetTable[track]; // numDoubles offset | 2170 trackOffset=trackOffsetTable[track]; // numDoubles offset |
2168 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset | 2171 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset |
2169 | 2172 |
2170 if(sequenceLength<=trackTable[track]){ // test for short sequences | 2173 if(sequenceLength<=trackTable[track]){ // test for short sequences |
2171 | 2174 |
2172 if(verbosity>7) { | 2175 if(verbosity>7) { |
2173 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); | 2176 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); |
2187 } | 2190 } |
2188 | 2191 |
2189 // Dot product | 2192 // Dot product |
2190 for(j=0; j<numVectors; j++) | 2193 for(j=0; j<numVectors; j++) |
2191 for(k=0; k<trackTable[track]; k++){ | 2194 for(k=0; k<trackTable[track]; k++){ |
2192 qp=query+j*dbH->dim; | 2195 qp=query+j*ntohl(dbH->dim); |
2193 sp=dataBuf+trackOffset+k*dbH->dim; | 2196 sp=dataBuf+trackOffset+k*ntohl(dbH->dim); |
2194 DD[j][k]=0.0; // Initialize matched filter array | 2197 DD[j][k]=0.0; // Initialize matched filter array |
2195 dp=&D[j][k]; // point to correlation cell j,k | 2198 dp=&D[j][k]; // point to correlation cell j,k |
2196 *dp=0.0; // initialize correlation cell | 2199 *dp=0.0; // initialize correlation cell |
2197 l=dbH->dim; // size of vectors | 2200 l=ntohl(dbH->dim); // size of vectors |
2198 while(l--) | 2201 while(l--) |
2199 *dp+=*qp++**sp++; | 2202 *dp+=*qp++**sp++; |
2200 } | 2203 } |
2201 | 2204 |
2202 // Matched Filter | 2205 // Matched Filter |
2419 double *p; | 2422 double *p; |
2420 unsigned nn = n; | 2423 unsigned nn = n; |
2421 | 2424 |
2422 assert(l2normTable); | 2425 assert(l2normTable); |
2423 | 2426 |
2424 if( !append && (dbH->flags & O2_FLAG_L2NORM) ) | 2427 if( !append && (ntohl(dbH->flags) & O2_FLAG_L2NORM) ) |
2425 error("Database is already L2 normed", "automatic norm on insert is enabled"); | 2428 error("Database is already L2 normed", "automatic norm on insert is enabled"); |
2426 | 2429 |
2427 if(verbosity>2) { | 2430 if(verbosity>2) { |
2428 cerr << "norming " << n << " vectors...";cerr.flush(); | 2431 cerr << "norming " << n << " vectors...";cerr.flush(); |
2429 } | 2432 } |
2456 if(append) { | 2459 if(append) { |
2457 // FIXME: a hack, a very palpable hack: the vectors have already | 2460 // FIXME: a hack, a very palpable hack: the vectors have already |
2458 // been inserted, and dbH->length has already been updated. We | 2461 // been inserted, and dbH->length has already been updated. We |
2459 // need to subtract off again the number of vectors that we've | 2462 // need to subtract off again the number of vectors that we've |
2460 // inserted this time... | 2463 // inserted this time... |
2461 offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors | 2464 offset=(ntohl(dbH->length)/(ntohl(dbH->dim)*sizeof(double)))-n; // number of vectors |
2462 } else { | 2465 } else { |
2463 offset=0; | 2466 offset=0; |
2464 } | 2467 } |
2465 memcpy(l2normTable+offset, l2buf, n*sizeof(double)); | 2468 memcpy(l2normTable+offset, l2buf, n*sizeof(double)); |
2466 if(l2buf) | 2469 if(l2buf) |