comparison audioDB.cpp @ 116:531ce5162861 endian-neutral

Towards endian-neutrality, step 1. dbH treatment is now endian-neutral: all on-disk and in-memory uint32_t fields of dbH are in network byte order, and all reads and writes in audioDB host code go through ntohl() and htonl() as appropriate.
author mas01cr
date Fri, 12 Oct 2007 11:20:35 +0000
parents bc141fd1dc41
children e800eac265c3
comparison
equal deleted inserted replaced
115:97f4ff699d7c 116:531ce5162861
418 418
419 dbH = new dbTableHeaderT(); 419 dbH = new dbTableHeaderT();
420 assert(dbH); 420 assert(dbH);
421 421
422 // Initialize header 422 // Initialize header
423 dbH->magic = O2_MAGIC; 423 dbH->magic = htonl(O2_MAGIC);
424 dbH->version = O2_FORMAT_VERSION; 424 dbH->version = htonl(O2_FORMAT_VERSION);
425 dbH->numFiles = 0; 425 dbH->numFiles = 0;
426 dbH->dim = 0; 426 dbH->dim = 0;
427 dbH->flags = 0; 427 dbH->flags = 0;
428 dbH->length = 0; 428 dbH->length = 0;
429 dbH->fileTableOffset = ALIGN_UP(O2_HEADERSIZE, 8); 429 dbH->fileTableOffset = htonl(ALIGN_UP(O2_HEADERSIZE, 8));
430 dbH->trackTableOffset = ALIGN_UP(dbH->fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES, 8); 430 dbH->trackTableOffset = htonl(ALIGN_UP(ntohl(dbH->fileTableOffset) + O2_FILETABLESIZE*O2_MAXFILES, 8));
431 dbH->dataOffset = ALIGN_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES, 8); 431 dbH->dataOffset = htonl(ALIGN_UP(ntohl(dbH->trackTableOffset) + O2_TRACKTABLESIZE*O2_MAXFILES, 8));
432 dbH->l2normTableOffset = ALIGN_DOWN(O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8); 432 dbH->l2normTableOffset = htonl(ALIGN_DOWN(O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8));
433 dbH->timesTableOffset = ALIGN_DOWN(dbH->l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8); 433 dbH->timesTableOffset = htonl(ALIGN_DOWN(ntohl(dbH->l2normTableOffset) - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8));
434 434
435 memcpy (db, dbH, O2_HEADERSIZE); 435 memcpy (db, dbH, O2_HEADERSIZE);
436 if(verbosity) { 436 if(verbosity) {
437 cerr << COM_CREATE << " " << dbName << endl; 437 cerr << COM_CREATE << " " << dbName << endl;
438 } 438 }
439 } 439 }
440
441 440
442 void audioDB::drop(){ 441 void audioDB::drop(){
443 // FIXME: drop something? Should we even allow this? 442 // FIXME: drop something? Should we even allow this?
444 } 443 }
445 444
465 464
466 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { 465 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) {
467 error("error reading db header", dbName, "read"); 466 error("error reading db header", dbName, "read");
468 } 467 }
469 468
470 if(dbH->magic == O2_OLD_MAGIC) { 469 if(ntohl(dbH->magic) == O2_OLD_MAGIC) {
471 // FIXME: if anyone ever complains, write the program to convert 470 // FIXME: if anyone ever complains, write the program to convert
472 // from the old audioDB format to the new... 471 // from the old audioDB format to the new...
473 error("database file has old O2 header", dbName); 472 error("database file has old O2 header", dbName);
474 } 473 }
475 474
476 if(dbH->magic != O2_MAGIC) { 475 if(ntohl(dbH->magic) != O2_MAGIC) {
477 cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl; 476 cerr << "expected: " << O2_MAGIC << ", got: " << ntohl(dbH->magic) << endl;
478 error("database file has incorrect header", dbName); 477 error("database file has incorrect header", dbName);
479 } 478 }
480 479
481 if(dbH->version != O2_FORMAT_VERSION) { 480 if(ntohl(dbH->version) != O2_FORMAT_VERSION) {
482 error("database file has incorect version", dbName); 481 error("database file has incorect version", dbName);
483 } 482 }
484 483
485 if(inFile) 484 if(inFile) {
486 if(dbH->dim == 0 && dbH->length == 0) // empty database 485 uint32_t inDim;
487 // initialize with input dimensionality 486 read(infid, &inDim, sizeof(uint32_t));
488 read(infid, &dbH->dim, sizeof(unsigned)); 487 if(ntohl(dbH->dim) == 0 && ntohl(dbH->length) == 0) {
489 else { 488 // empty database: initialize with input dimensionality
490 unsigned test; 489 dbH->dim = htonl(inDim);
491 read(infid, &test, sizeof(unsigned)); 490 } else {
492 if(dbH->dim != test) { 491 if(dbH->dim != htonl(inDim)) {
493 cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <<endl; 492 cerr << "error: expected dimension: " << ntohl(dbH->dim) << ", got : " << inDim << endl;
494 error("feature dimensions do not match database table dimensions"); 493 error("feature dimensions do not match database table dimensions");
495 } 494 }
496 } 495 }
496 }
497 497
498 // mmap the input file 498 // mmap the input file
499 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) 499 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
500 == (caddr_t) -1) 500 == (caddr_t) -1)
501 error("mmap error for input", inFile, "mmap"); 501 error("mmap error for input", inFile, "mmap");
504 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0), 504 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0),
505 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) 505 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
506 error("mmap error for initting tables of database", "", "mmap"); 506 error("mmap error for initting tables of database", "", "mmap");
507 507
508 // Make some handy tables with correct types 508 // Make some handy tables with correct types
509 fileTable= (char*)(db+dbH->fileTableOffset); 509 fileTable = (char *)(db + ntohl(dbH->fileTableOffset));
510 trackTable = (unsigned*)(db+dbH->trackTableOffset); 510 trackTable = (unsigned *)(db + ntohl(dbH->trackTableOffset));
511 dataBuf = (double*)(db+dbH->dataOffset); 511 dataBuf = (double *)(db + ntohl(dbH->dataOffset));
512 l2normTable = (double*)(db+dbH->l2normTableOffset); 512 l2normTable = (double *)(db + ntohl(dbH->l2normTableOffset));
513 timesTable = (double*)(db+dbH->timesTableOffset); 513 timesTable = (double *)(db + ntohl(dbH->timesTableOffset));
514 } 514 }
515 515
516 void audioDB::insert(const char* dbName, const char* inFile){ 516 void audioDB::insert(const char* dbName, const char* inFile){
517 517
518 initTables(dbName, 1, inFile); 518 initTables(dbName, 1, inFile);
519 519
520 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 520 if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES))
521 error("Must use timestamps with timestamped database","use --times"); 521 error("Must use timestamps with timestamped database","use --times");
522 522
523 // Check that there is room for at least 1 more file 523 // Check that there is room for at least 1 more file
524 if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int))) 524 if((char*)timesTable<((char*)dataBuf+ntohl(dbH->length)+statbuf.st_size-sizeof(int)))
525 error("No more room in database","insert failed: reason database is full."); 525 error("No more room in database","insert failed: reason database is full.");
526 526
527 if(!key) 527 if(!key)
528 key=inFile; 528 key=inFile;
529 // Linear scan of filenames check for pre-existing feature 529 // Linear scan of filenames check for pre-existing feature
530 unsigned alreadyInserted=0; 530 unsigned alreadyInserted=0;
531 for(unsigned k=0; k<dbH->numFiles; k++) 531 for(unsigned k=0; k<ntohl(dbH->numFiles); k++)
532 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){ 532 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){
533 alreadyInserted=1; 533 alreadyInserted=1;
534 break; 534 break;
535 } 535 }
536 536
540 } 540 }
541 return; 541 return;
542 } 542 }
543 543
544 // Make a track index table of features to file indexes 544 // Make a track index table of features to file indexes
545 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 545 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim));
546 if(!numVectors){ 546 if(!numVectors){
547 if(verbosity) { 547 if(verbosity) {
548 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; 548 cerr << "Warning: ignoring zero-length feature vector file:" << key << endl;
549 } 549 }
550 // CLEAN UP 550 // CLEAN UP
552 munmap(db,O2_DEFAULTDBSIZE); 552 munmap(db,O2_DEFAULTDBSIZE);
553 close(infid); 553 close(infid);
554 return; 554 return;
555 } 555 }
556 556
557 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); 557 strncpy(fileTable + ntohl(dbH->numFiles)*O2_FILETABLESIZE, key, strlen(key));
558 558
559 unsigned insertoffset = dbH->length;// Store current state 559 unsigned insertoffset = ntohl(dbH->length);// Store current state
560 560
561 // Check times status and insert times from file 561 // Check times status and insert times from file
562 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); 562 unsigned timesoffset=insertoffset/(ntohl(dbH->dim)*sizeof(double));
563 double* timesdata=timesTable+timesoffset; 563 double* timesdata=timesTable+timesoffset;
564 assert(timesdata+numVectors<l2normTable); 564 assert(timesdata+numVectors<l2normTable);
565 insertTimeStamps(numVectors, timesFile, timesdata); 565 insertTimeStamps(numVectors, timesFile, timesdata);
566 566
567 // Increment file count 567 // Increment file count
568 dbH->numFiles++; 568 dbH->numFiles = htonl(ntohl(dbH->numFiles) + 1);
569 569
570 // Update Header information 570 // Update Header information
571 dbH->length+=(statbuf.st_size-sizeof(int)); 571 dbH->length = htonl(ntohl(dbH->length) + (statbuf.st_size-sizeof(int)));
572 572
573 // Copy the header back to the database 573 // Copy the header back to the database
574 memcpy (db, dbH, sizeof(dbTableHeaderT)); 574 memcpy (db, dbH, sizeof(dbTableHeaderT));
575 575
576 // Update track to file index map 576 // Update track to file index map
577 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); 577 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
578 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); 578 //memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
579 *(trackTable + ntohl(dbH->numFiles) - 1) = numVectors;
579 580
580 // Update the feature database 581 // Update the feature database
581 memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); 582 memcpy (db+ntohl(dbH->dataOffset)+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
582 583
583 // Norm the vectors on input if the database is already L2 normed 584 // Norm the vectors on input if the database is already L2 normed
584 if(dbH->flags & O2_FLAG_L2NORM) 585 if(ntohl(dbH->flags) & O2_FLAG_L2NORM)
585 unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append 586 unitNormAndInsertL2((double*)(db+ntohl(dbH->dataOffset)+insertoffset), ntohl(dbH->dim), numVectors, 1); // append
586 587
587 // Report status 588 // Report status
588 status(dbName); 589 status(dbName);
589 if(verbosity) { 590 if(verbosity) {
590 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " 591 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
597 } 598 }
598 599
599 void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){ 600 void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){
600 unsigned numtimes=0; 601 unsigned numtimes=0;
601 if(usingTimes){ 602 if(usingTimes){
602 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) 603 if(!(ntohl(dbH->flags) & O2_FLAG_TIMES) && !(ntohl(dbH->numFiles)))
603 dbH->flags=dbH->flags|O2_FLAG_TIMES; 604 dbH->flags = htonl(ntohl(dbH->flags) | O2_FLAG_TIMES);
604 else if(!(dbH->flags&O2_FLAG_TIMES)){ 605 else if(!(ntohl(dbH->flags) & O2_FLAG_TIMES)) {
605 cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl; 606 cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl;
606 usingTimes=0; 607 usingTimes=0;
607 } 608 }
608 609
609 if(!timesFile->is_open()){ 610 if(!timesFile->is_open()){
610 if(dbH->flags & O2_FLAG_TIMES){ 611 if(ntohl(dbH->flags) & O2_FLAG_TIMES){
611 munmap(indata,statbuf.st_size); 612 munmap(indata,statbuf.st_size);
612 munmap(db,O2_DEFAULTDBSIZE); 613 munmap(db,O2_DEFAULTDBSIZE);
613 error("problem opening times file on timestamped database",timesFileName); 614 error("problem opening times file on timestamped database",timesFileName);
614 } 615 }
615 else{ 616 else{
672 assert(dbH); 673 assert(dbH);
673 674
674 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) 675 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT))
675 error("error reading db header"); 676 error("error reading db header");
676 677
677 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 678 if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES))
678 error("Must use timestamps with timestamped database","use --times"); 679 error("Must use timestamps with timestamped database","use --times");
679 680
680 if(dbH->magic!=O2_MAGIC){ 681 if(ntohl(dbH->magic)!=O2_MAGIC){
681 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; 682 cerr << "expected:" << O2_MAGIC << ", got:" << ntohl(dbH->magic) << endl;
682 error("database file has incorrect header",dbName); 683 error("database file has incorrect header",dbName);
683 } 684 }
684 685
685 unsigned totalVectors=0; 686 unsigned totalVectors=0;
686 char *thisKey = new char[MAXSTR]; 687 char *thisKey = new char[MAXSTR];
711 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, 712 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
712 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) 713 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
713 error("mmap error for batchinsert into database", "", "mmap"); 714 error("mmap error for batchinsert into database", "", "mmap");
714 715
715 // Make some handy tables with correct types 716 // Make some handy tables with correct types
716 fileTable= (char*)(db+dbH->fileTableOffset); 717 fileTable= (char*)(db + ntohl(dbH->fileTableOffset));
717 trackTable = (unsigned*)(db+dbH->trackTableOffset); 718 trackTable = (unsigned*)(db + ntohl(dbH->trackTableOffset));
718 dataBuf = (double*)(db+dbH->dataOffset); 719 dataBuf = (double*)(db + ntohl(dbH->dataOffset));
719 l2normTable = (double*)(db+dbH->l2normTableOffset); 720 l2normTable = (double*)(db+ntohl(dbH->l2normTableOffset));
720 timesTable = (double*)(db+dbH->timesTableOffset); 721 timesTable = (double*)(db+ntohl(dbH->timesTableOffset));
721 722
722 // Check that there is room for at least 1 more file 723 // Check that there is room for at least 1 more file
723 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) 724 if((char*)timesTable<((char*)dataBuf+(ntohl(dbH->length)+statbuf.st_size-sizeof(int))))
724 error("No more room in database","insert failed: reason database is full."); 725 error("No more room in database","insert failed: reason database is full.");
725 726
726 if(thisFile) 727 if(thisFile) {
727 if(dbH->dim==0 && dbH->length==0) // empty database 728 uint32_t thisDim;
728 read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality 729 read(infid,&thisDim,sizeof(uint32_t));
729 else { 730 if(ntohl(dbH->dim) == 0 && ntohl(dbH->length)==0) {
730 unsigned test; 731 // empty database: initialize with input dimensionality
731 read(infid,&test,sizeof(unsigned)); 732 dbH->dim = htonl(thisDim);
732 if(dbH->dim!=test){ 733 } else {
733 cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl; 734 if(ntohl(dbH->dim) != thisDim) {
735 cerr << "error: expected dimension: " << ntohl(dbH->dim) << ", got :" << thisDim <<endl;
734 error("feature dimensions do not match database table dimensions"); 736 error("feature dimensions do not match database table dimensions");
735 } 737 }
736 } 738 }
737 739 }
738 // mmap the input file 740 // mmap the input file
739 if (thisFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) 741 if (thisFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
740 == (caddr_t) -1) 742 == (caddr_t) -1)
741 error("mmap error for input", "", "mmap"); 743 error("mmap error for input", "", "mmap");
742 744
743 745
744 // Linear scan of filenames check for pre-existing feature 746 // Linear scan of filenames check for pre-existing feature
745 unsigned alreadyInserted=0; 747 unsigned alreadyInserted=0;
746 748
747 for(unsigned k=0; k<dbH->numFiles; k++) 749 for(unsigned k=0; k < ntohl(dbH->numFiles); k++)
748 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){ 750 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){
749 alreadyInserted=1; 751 alreadyInserted=1;
750 break; 752 break;
751 } 753 }
752 754
756 } 758 }
757 } 759 }
758 else{ 760 else{
759 761
760 // Make a track index table of features to file indexes 762 // Make a track index table of features to file indexes
761 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 763 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim));
762 if(!numVectors){ 764 if(!numVectors){
763 if(verbosity) { 765 if(verbosity) {
764 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; 766 cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl;
765 } 767 }
766 } 768 }
769 if(timesFile->eof()) 771 if(timesFile->eof())
770 error("not enough timestamp files in timesList"); 772 error("not enough timestamp files in timesList");
771 thisTimesFile=new ifstream(thisTimesFileName,ios::in); 773 thisTimesFile=new ifstream(thisTimesFileName,ios::in);
772 if(!thisTimesFile->is_open()) 774 if(!thisTimesFile->is_open())
773 error("Cannot open timestamp file",thisTimesFileName); 775 error("Cannot open timestamp file",thisTimesFileName);
774 unsigned insertoffset=dbH->length; 776 unsigned insertoffset= ntohl(dbH->length);
775 unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); 777 unsigned timesoffset=insertoffset/(ntohl(dbH->dim)*sizeof(double));
776 double* timesdata=timesTable+timesoffset; 778 double* timesdata=timesTable+timesoffset;
777 assert(timesdata+numVectors<l2normTable); 779 assert(timesdata+numVectors<l2normTable);
778 insertTimeStamps(numVectors,thisTimesFile,timesdata); 780 insertTimeStamps(numVectors,thisTimesFile,timesdata);
779 if(thisTimesFile) 781 if(thisTimesFile)
780 delete thisTimesFile; 782 delete thisTimesFile;
781 } 783 }
782 784
783 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); 785 strncpy(fileTable + ntohl(dbH->numFiles)*O2_FILETABLESIZE, thisKey, strlen(thisKey));
784 786
785 unsigned insertoffset = dbH->length;// Store current state 787 unsigned insertoffset = ntohl(dbH->length);// Store current state
786 788
787 // Increment file count 789 // Increment file count
788 dbH->numFiles++; 790 dbH->numFiles = htonl(ntohl(dbH->numFiles) + 1);
789 791
790 // Update Header information 792 // Update Header information
791 dbH->length+=(statbuf.st_size-sizeof(int)); 793 dbH->length = htonl(ntohl(dbH->length) + (statbuf.st_size-sizeof(int)));
792 // Copy the header back to the database 794 // Copy the header back to the database
793 memcpy (db, dbH, sizeof(dbTableHeaderT)); 795 memcpy (db, dbH, sizeof(dbTableHeaderT));
794 796
795 // Update track to file index map 797 // Update track to file index map
796 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); 798 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
797 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); 799 //memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
800 *(trackTable + ntohl(dbH->numFiles) - 1) = numVectors;
798 801
799 // Update the feature database 802 // Update the feature database
800 memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); 803 memcpy (db+ntohl(dbH->dataOffset)+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
801 804
802 // Norm the vectors on input if the database is already L2 normed 805 // Norm the vectors on input if the database is already L2 normed
803 if(dbH->flags & O2_FLAG_L2NORM) 806 if(ntohl(dbH->flags) & O2_FLAG_L2NORM)
804 unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append 807 unitNormAndInsertL2((double*)(db+ntohl(dbH->dataOffset)+insertoffset), ntohl(dbH->dim), numVectors, 1); // append
805 808
806 totalVectors+=numVectors; 809 totalVectors+=numVectors;
807 } 810 }
808 } 811 }
809 // CLEAN UP 812 // CLEAN UP
817 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) 820 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
818 error("mmap error for creating database", "", "mmap"); 821 error("mmap error for creating database", "", "mmap");
819 822
820 if(verbosity) { 823 if(verbosity) {
821 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " 824 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
822 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; 825 << totalVectors*ntohl(dbH->dim)*sizeof(double) << " bytes." << endl;
823 } 826 }
824 827
825 // Report status 828 // Report status
826 status(dbName); 829 status(dbName);
827 830
880 if(!dbH) 883 if(!dbH)
881 initTables(dbName, 0, 0); 884 initTables(dbName, 0, 0);
882 885
883 unsigned dudCount=0; 886 unsigned dudCount=0;
884 unsigned nullCount=0; 887 unsigned nullCount=0;
885 for(unsigned k=0; k<dbH->numFiles; k++){ 888 for(unsigned k=0; k<ntohl(dbH->numFiles); k++){
886 if(trackTable[k]<sequenceLength){ 889 if(trackTable[k]<sequenceLength){
887 dudCount++; 890 dudCount++;
888 if(!trackTable[k]) 891 if(!trackTable[k])
889 nullCount++; 892 nullCount++;
890 } 893 }
891 } 894 }
892 895
893 if(adbStatusResult == 0) { 896 if(adbStatusResult == 0) {
894 897
895 // Update Header information 898 // Update Header information
896 cout << "num files:" << dbH->numFiles << endl; 899 cout << "num files:" << ntohl(dbH->numFiles) << endl;
897 cout << "data dim:" << dbH->dim <<endl; 900 cout << "data dim:" << ntohl(dbH->dim) <<endl;
898 if(dbH->dim>0){ 901 if(ntohl(dbH->dim)>0){
899 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl; 902 cout << "total vectors:" << ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim))<<endl;
900 cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << endl; 903 cout << "vectors available:" << (ntohl(dbH->timesTableOffset)-(ntohl(dbH->dataOffset)+ntohl(dbH->length)))/(sizeof(double)*ntohl(dbH->dim)) << endl;
901 } 904 }
902 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; 905 cout << "total bytes:" << ntohl(dbH->length) << " (" << (100.0*ntohl(dbH->length))/(ntohl(dbH->timesTableOffset)-ntohl(dbH->dataOffset)) << "%)" << endl;
903 cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << 906 cout << "bytes available:" << ntohl(dbH->timesTableOffset)-(ntohl(dbH->dataOffset)+ntohl(dbH->length)) << " (" <<
904 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; 907 (100.0*(ntohl(dbH->timesTableOffset)-(ntohl(dbH->dataOffset)+ntohl(dbH->length))))/(ntohl(dbH->timesTableOffset)-ntohl(dbH->dataOffset)) << "%)" << endl;
905 cout << "flags:" << dbH->flags << endl; 908 cout << "flags:" << ntohl(dbH->flags) << endl;
906 909
907 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; 910 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl;
908 } else { 911 } else {
909 adbStatusResult->numFiles = dbH->numFiles; 912 adbStatusResult->numFiles = ntohl(dbH->numFiles);
910 adbStatusResult->dim = dbH->dim; 913 adbStatusResult->dim = ntohl(dbH->dim);
911 adbStatusResult->length = dbH->length; 914 adbStatusResult->length = ntohl(dbH->length);
912 adbStatusResult->dudCount = dudCount; 915 adbStatusResult->dudCount = dudCount;
913 adbStatusResult->nullCount = nullCount; 916 adbStatusResult->nullCount = nullCount;
914 adbStatusResult->flags = dbH->flags; 917 adbStatusResult->flags = ntohl(dbH->flags);
915 } 918 }
916 } 919 }
917 920
918 void audioDB::dump(const char* dbName){ 921 void audioDB::dump(const char* dbName){
919 if(!dbH) 922 if(!dbH)
920 initTables(dbName, 0, 0); 923 initTables(dbName, 0, 0);
921 924
922 for(unsigned k=0, j=0; k<dbH->numFiles; k++){ 925 for(unsigned k=0, j=0; k<ntohl(dbH->numFiles); k++){
923 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; 926 cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl;
924 j+=trackTable[k]; 927 j+=trackTable[k];
925 } 928 }
926 929
927 status(dbName); 930 status(dbName);
928 } 931 }
929 932
930 void audioDB::l2norm(const char* dbName){ 933 void audioDB::l2norm(const char* dbName){
931 initTables(dbName, true, 0); 934 initTables(dbName, true, 0);
932 if(dbH->length>0){ 935 if(ntohl(dbH->length)>0){
933 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); 936 unsigned numVectors = ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim));
934 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append 937 unitNormAndInsertL2(dataBuf, ntohl(dbH->dim), numVectors, 0); // No append
935 } 938 }
936 // Update database flags 939 // Update database flags
937 dbH->flags = dbH->flags|O2_FLAG_L2NORM; 940 dbH->flags = htonl(ntohl(dbH->flags) | O2_FLAG_L2NORM);
938 memcpy (db, dbH, O2_HEADERSIZE); 941 memcpy (db, dbH, O2_HEADERSIZE);
939 } 942 }
940 943
941 944
942 945
960 } 963 }
961 } 964 }
962 965
963 //return ordinal position of key in keyTable 966 //return ordinal position of key in keyTable
964 unsigned audioDB::getKeyPos(char* key){ 967 unsigned audioDB::getKeyPos(char* key){
965 for(unsigned k=0; k<dbH->numFiles; k++) 968 for(unsigned k=0; k< ntohl(dbH->numFiles); k++)
966 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) 969 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
967 return k; 970 return k;
968 error("Key not found",key); 971 error("Key not found",key);
969 return O2_ERR_KEYNOTFOUND; 972 return O2_ERR_KEYNOTFOUND;
970 } 973 }
974 977
975 initTables(dbName, 0, inFile); 978 initTables(dbName, 0, inFile);
976 979
977 // For each input vector, find the closest pointNN matching output vectors and report 980 // For each input vector, find the closest pointNN matching output vectors and report
978 // we use stdout in this stub version 981 // we use stdout in this stub version
979 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 982 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim));
980 983
981 double* query = (double*)(indata+sizeof(int)); 984 double* query = (double*)(indata+sizeof(int));
982 double* data = dataBuf; 985 double* data = dataBuf;
983 double* queryCopy = 0; 986 double* queryCopy = 0;
984 987
985 if( dbH->flags & O2_FLAG_L2NORM ){ 988 if(ntohl(dbH->flags) & O2_FLAG_L2NORM ){
986 // Make a copy of the query 989 // Make a copy of the query
987 queryCopy = new double[numVectors*dbH->dim]; 990 queryCopy = new double[numVectors*ntohl(dbH->dim)];
988 qNorm = new double[numVectors]; 991 qNorm = new double[numVectors];
989 assert(queryCopy&&qNorm); 992 assert(queryCopy&&qNorm);
990 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); 993 memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double));
991 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); 994 unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm);
992 query = queryCopy; 995 query = queryCopy;
993 } 996 }
994 997
995 // Make temporary dynamic memory for results 998 // Make temporary dynamic memory for results
996 assert(pointNN>0 && pointNN<=O2_MAXNN); 999 assert(pointNN>0 && pointNN<=O2_MAXNN);
1005 1008
1006 unsigned j=numVectors; 1009 unsigned j=numVectors;
1007 unsigned k,l,n; 1010 unsigned k,l,n;
1008 double thisDist; 1011 double thisDist;
1009 1012
1010 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double)); 1013 unsigned totalVecs=ntohl(dbH->length)/(ntohl(dbH->dim)*sizeof(double));
1011 double meanQdur = 0; 1014 double meanQdur = 0;
1012 double* timesdata = 0; 1015 double* timesdata = 0;
1013 double* dbdurs = 0; 1016 double* dbdurs = 0;
1014 1017
1015 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ 1018 if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){
1016 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; 1019 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
1017 usingTimes=0; 1020 usingTimes=0;
1018 } 1021 }
1019 1022
1020 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 1023 else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES))
1021 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; 1024 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
1022 1025
1023 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ 1026 else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){
1024 timesdata = new double[numVectors]; 1027 timesdata = new double[numVectors];
1025 insertTimeStamps(numVectors, timesFile, timesdata); 1028 insertTimeStamps(numVectors, timesFile, timesdata);
1026 // Calculate durations of points 1029 // Calculate durations of points
1027 for(k=0; k<numVectors-1; k++){ 1030 for(k=0; k<numVectors-1; k++){
1028 timesdata[k]=timesdata[k+1]-timesdata[k]; 1031 timesdata[k]=timesdata[k+1]-timesdata[k];
1041 error("queryPoint > numVectors in query"); 1044 error("queryPoint > numVectors in query");
1042 else{ 1045 else{
1043 if(verbosity>1) { 1046 if(verbosity>1) {
1044 cerr << "query point: " << queryPoint << endl; cerr.flush(); 1047 cerr << "query point: " << queryPoint << endl; cerr.flush();
1045 } 1048 }
1046 query=query+queryPoint*dbH->dim; 1049 query=query+queryPoint*ntohl(dbH->dim);
1047 numVectors=queryPoint+1; 1050 numVectors=queryPoint+1;
1048 j=1; 1051 j=1;
1049 } 1052 }
1050 1053
1051 gettimeofday(&tv1, NULL); 1054 gettimeofday(&tv1, NULL);
1052 while(j--){ // query 1055 while(j--){ // query
1053 data=dataBuf; 1056 data=dataBuf;
1054 k=totalVecs; // number of database vectors 1057 k=totalVecs; // number of database vectors
1055 while(k--){ // database 1058 while(k--){ // database
1056 thisDist=0; 1059 thisDist=0;
1057 l=dbH->dim; 1060 l=ntohl(dbH->dim);
1058 double* q=query; 1061 double* q=query;
1059 while(l--) 1062 while(l--)
1060 thisDist+=*q++**data++; 1063 thisDist+=*q++**data++;
1061 if(!usingTimes || 1064 if(!usingTimes ||
1062 (usingTimes 1065 (usingTimes
1071 qIndexes[l]=qIndexes[l-1]; 1074 qIndexes[l]=qIndexes[l-1];
1072 sIndexes[l]=sIndexes[l-1]; 1075 sIndexes[l]=sIndexes[l-1];
1073 } 1076 }
1074 distances[n]=thisDist; 1077 distances[n]=thisDist;
1075 qIndexes[n]=numVectors-j-1; 1078 qIndexes[n]=numVectors-j-1;
1076 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1; 1079 sIndexes[n]=ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim))-k-1;
1077 break; 1080 break;
1078 } 1081 }
1079 } 1082 }
1080 else 1083 else
1081 break; 1084 break;
1082 } 1085 }
1083 } 1086 }
1084 } 1087 }
1085 // Move query pointer to next query point 1088 // Move query pointer to next query point
1086 query+=dbH->dim; 1089 query+=ntohl(dbH->dim);
1087 } 1090 }
1088 1091
1089 gettimeofday(&tv2, NULL); 1092 gettimeofday(&tv2, NULL);
1090 if(verbosity>1) { 1093 if(verbosity>1) {
1091 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; 1094 cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
1095 // Output answer 1098 // Output answer
1096 // Loop over nearest neighbours 1099 // Loop over nearest neighbours
1097 for(k=0; k < pointNN; k++){ 1100 for(k=0; k < pointNN; k++){
1098 // Scan for key 1101 // Scan for key
1099 unsigned cumTrack=0; 1102 unsigned cumTrack=0;
1100 for(l=0 ; l<dbH->numFiles; l++){ 1103 for(l=0 ; l<ntohl(dbH->numFiles); l++){
1101 cumTrack+=trackTable[l]; 1104 cumTrack+=trackTable[l];
1102 if(sIndexes[k]<cumTrack){ 1105 if(sIndexes[k]<cumTrack){
1103 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " 1106 cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " "
1104 << sIndexes[k]+trackTable[l]-cumTrack << endl; 1107 << sIndexes[k]+trackTable[l]-cumTrack << endl;
1105 break; 1108 break;
1126 for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){ 1129 for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){
1127 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; 1130 adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
1128 adbQueryResult->Dist[k]=distances[k]; 1131 adbQueryResult->Dist[k]=distances[k];
1129 adbQueryResult->Qpos[k]=qIndexes[k]; 1132 adbQueryResult->Qpos[k]=qIndexes[k];
1130 unsigned cumTrack=0; 1133 unsigned cumTrack=0;
1131 for(l=0 ; l<dbH->numFiles; l++){ 1134 for(l=0 ; l<ntohl(dbH->numFiles); l++){
1132 cumTrack+=trackTable[l]; 1135 cumTrack+=trackTable[l];
1133 if(sIndexes[k]<cumTrack){ 1136 if(sIndexes[k]<cumTrack){
1134 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); 1137 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE);
1135 break; 1138 break;
1136 } 1139 }
1155 // uses average of pointNN points per track 1158 // uses average of pointNN points per track
1156 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ 1159 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
1157 initTables(dbName, 0, inFile); 1160 initTables(dbName, 0, inFile);
1158 1161
1159 // For each input vector, find the closest pointNN matching output vectors and report 1162 // For each input vector, find the closest pointNN matching output vectors and report
1160 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 1163 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim));
1161 double* query = (double*)(indata+sizeof(int)); 1164 double* query = (double*)(indata+sizeof(int));
1162 double* data = dataBuf; 1165 double* data = dataBuf;
1163 double* queryCopy = 0; 1166 double* queryCopy = 0;
1164 1167
1165 if( dbH->flags & O2_FLAG_L2NORM ){ 1168 if( ntohl(dbH->flags) & O2_FLAG_L2NORM ){
1166 // Make a copy of the query 1169 // Make a copy of the query
1167 queryCopy = new double[numVectors*dbH->dim]; 1170 queryCopy = new double[numVectors*ntohl(dbH->dim)];
1168 qNorm = new double[numVectors]; 1171 qNorm = new double[numVectors];
1169 assert(queryCopy&&qNorm); 1172 assert(queryCopy&&qNorm);
1170 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); 1173 memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double));
1171 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); 1174 unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm);
1172 query = queryCopy; 1175 query = queryCopy;
1173 } 1176 }
1174 1177
1175 assert(pointNN>0 && pointNN<=O2_MAXNN); 1178 assert(pointNN>0 && pointNN<=O2_MAXNN);
1176 assert(trackNN>0 && trackNN<=O2_MAXNN); 1179 assert(trackNN>0 && trackNN<=O2_MAXNN);
1204 1207
1205 double meanQdur = 0; 1208 double meanQdur = 0;
1206 double* timesdata = 0; 1209 double* timesdata = 0;
1207 double* meanDBdur = 0; 1210 double* meanDBdur = 0;
1208 1211
1209 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ 1212 if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){
1210 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; 1213 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
1211 usingTimes=0; 1214 usingTimes=0;
1212 } 1215 }
1213 1216
1214 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 1217 else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES))
1215 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; 1218 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
1216 1219
1217 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ 1220 else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){
1218 timesdata = new double[numVectors]; 1221 timesdata = new double[numVectors];
1219 insertTimeStamps(numVectors, timesFile, timesdata); 1222 insertTimeStamps(numVectors, timesFile, timesdata);
1220 // Calculate durations of points 1223 // Calculate durations of points
1221 for(k=0; k<numVectors-1; k++){ 1224 for(k=0; k<numVectors-1; k++){
1222 timesdata[k]=timesdata[k+1]-timesdata[k]; 1225 timesdata[k]=timesdata[k+1]-timesdata[k];
1223 meanQdur+=timesdata[k]; 1226 meanQdur+=timesdata[k];
1224 } 1227 }
1225 meanQdur/=k; 1228 meanQdur/=k;
1226 meanDBdur = new double[dbH->numFiles]; 1229 meanDBdur = new double[ntohl(dbH->numFiles)];
1227 for(k=0; k<dbH->numFiles; k++){ 1230 for(k=0; k<ntohl(dbH->numFiles); k++){
1228 meanDBdur[k]=0.0; 1231 meanDBdur[k]=0.0;
1229 for(j=0; j<trackTable[k]-1 ; j++) 1232 for(j=0; j<trackTable[k]-1 ; j++)
1230 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; 1233 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
1231 meanDBdur[k]/=j; 1234 meanDBdur[k]/=j;
1232 } 1235 }
1237 error("queryPoint > numVectors in query"); 1240 error("queryPoint > numVectors in query");
1238 else{ 1241 else{
1239 if(verbosity>1) { 1242 if(verbosity>1) {
1240 cerr << "query point: " << queryPoint << endl; cerr.flush(); 1243 cerr << "query point: " << queryPoint << endl; cerr.flush();
1241 } 1244 }
1242 query=query+queryPoint*dbH->dim; 1245 query=query+queryPoint*ntohl(dbH->dim);
1243 numVectors=queryPoint+1; 1246 numVectors=queryPoint+1;
1244 } 1247 }
1245 1248
1246 // build track offset table 1249 // build track offset table
1247 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; 1250 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)];
1248 unsigned cumTrack=0; 1251 unsigned cumTrack=0;
1249 unsigned trackIndexOffset; 1252 unsigned trackIndexOffset;
1250 for(k=0; k<dbH->numFiles;k++){ 1253 for(k=0; k<ntohl(dbH->numFiles);k++){
1251 trackOffsetTable[k]=cumTrack; 1254 trackOffsetTable[k]=cumTrack;
1252 cumTrack+=trackTable[k]*dbH->dim; 1255 cumTrack+=trackTable[k]*ntohl(dbH->dim);
1253 } 1256 }
1254 1257
1255 char nextKey[MAXSTR]; 1258 char nextKey[MAXSTR];
1256 1259
1257 gettimeofday(&tv1, NULL); 1260 gettimeofday(&tv1, NULL);
1258 1261
1259 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ 1262 for(processedTracks=0, track=0 ; processedTracks < ntohl(dbH->numFiles) ; track++, processedTracks++){
1260 if(trackFile){ 1263 if(trackFile){
1261 if(!trackFile->eof()){ 1264 if(!trackFile->eof()){
1262 trackFile->getline(nextKey,MAXSTR); 1265 trackFile->getline(nextKey,MAXSTR);
1263 track=getKeyPos(nextKey); 1266 track=getKeyPos(nextKey);
1264 } 1267 }
1265 else 1268 else
1266 break; 1269 break;
1267 } 1270 }
1268 trackOffset=trackOffsetTable[track]; // numDoubles offset 1271 trackOffset=trackOffsetTable[track]; // numDoubles offset
1269 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset 1272 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset
1270 if(verbosity>7) { 1273 if(verbosity>7) {
1271 cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); 1274 cerr << track << "." << trackOffset/(ntohl(dbH->dim)) << "." << trackTable[track] << " | ";cerr.flush();
1272 } 1275 }
1273 1276
1274 if(dbH->flags & O2_FLAG_L2NORM) 1277 if(ntohl(dbH->flags) & O2_FLAG_L2NORM)
1275 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; 1278 usingQueryPoint?query=queryCopy+queryPoint*ntohl(dbH->dim):query=queryCopy;
1276 else 1279 else
1277 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); 1280 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*ntohl(dbH->dim):query=(double*)(indata+sizeof(int));
1278 if(usingQueryPoint) 1281 if(usingQueryPoint)
1279 j=1; 1282 j=1;
1280 else 1283 else
1281 j=numVectors; 1284 j=numVectors;
1282 while(j--){ 1285 while(j--){
1283 k=trackTable[track]; // number of vectors in track 1286 k=trackTable[track]; // number of vectors in track
1284 data=dataBuf+trackOffset; // data for track 1287 data=dataBuf+trackOffset; // data for track
1285 while(k--){ 1288 while(k--){
1286 thisDist=0; 1289 thisDist=0;
1287 l=dbH->dim; 1290 l=ntohl(dbH->dim);
1288 double* q=query; 1291 double* q=query;
1289 while(l--) 1292 while(l--)
1290 thisDist+=*q++**data++; 1293 thisDist+=*q++**data++;
1291 if(!usingTimes || 1294 if(!usingTimes ||
1292 (usingTimes 1295 (usingTimes
1311 break; 1314 break;
1312 } 1315 }
1313 } 1316 }
1314 } // track 1317 } // track
1315 // Move query pointer to next query point 1318 // Move query pointer to next query point
1316 query+=dbH->dim; 1319 query+=ntohl(dbH->dim);
1317 } // query 1320 } // query
1318 // Take the average of this track's distance 1321 // Take the average of this track's distance
1319 // Test the track distances 1322 // Test the track distances
1320 thisDist=0; 1323 thisDist=0;
1321 for (n = 0; n < pointNN; n++) { 1324 for (n = 0; n < pointNN; n++) {
1411 1414
1412 initTables(dbName, 0, inFile); 1415 initTables(dbName, 0, inFile);
1413 1416
1414 // For each input vector, find the closest pointNN matching output vectors and report 1417 // For each input vector, find the closest pointNN matching output vectors and report
1415 // we use stdout in this stub version 1418 // we use stdout in this stub version
1416 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 1419 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim));
1417 double* query = (double*)(indata+sizeof(int)); 1420 double* query = (double*)(indata+sizeof(int));
1418 double* queryCopy = 0; 1421 double* queryCopy = 0;
1419 1422
1420 double qMeanL2; 1423 double qMeanL2;
1421 double* sMeanL2; 1424 double* sMeanL2;
1422 1425
1423 unsigned USE_THRESH=0; 1426 unsigned USE_THRESH=0;
1424 double SILENCE_THRESH=0; 1427 double SILENCE_THRESH=0;
1425 double DIFF_THRESH=0; 1428 double DIFF_THRESH=0;
1426 1429
1427 if(!(dbH->flags & O2_FLAG_L2NORM) ) 1430 if(!(ntohl(dbH->flags) & O2_FLAG_L2NORM) )
1428 error("Database must be L2 normed for sequence query","use -L2NORM"); 1431 error("Database must be L2 normed for sequence query","use -L2NORM");
1429 1432
1430 if(numVectors<sequenceLength) 1433 if(numVectors<sequenceLength)
1431 error("Query shorter than requested sequence length", "maybe use -l"); 1434 error("Query shorter than requested sequence length", "maybe use -l");
1432 1435
1433 if(verbosity>1) { 1436 if(verbosity>1) {
1434 cerr << "performing norms ... "; cerr.flush(); 1437 cerr << "performing norms ... "; cerr.flush();
1435 } 1438 }
1436 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); 1439 unsigned dbVectors = ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim));
1437 1440
1438 // Make a copy of the query 1441 // Make a copy of the query
1439 queryCopy = new double[numVectors*dbH->dim]; 1442 queryCopy = new double[numVectors*ntohl(dbH->dim)];
1440 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); 1443 memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double));
1441 qNorm = new double[numVectors]; 1444 qNorm = new double[numVectors];
1442 sNorm = new double[dbVectors]; 1445 sNorm = new double[dbVectors];
1443 sMeanL2=new double[dbH->numFiles]; 1446 sMeanL2=new double[ntohl(dbH->numFiles)];
1444 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); 1447 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
1445 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); 1448 unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm);
1446 query = queryCopy; 1449 query = queryCopy;
1447 1450
1448 // Make norm measurements relative to sequenceLength 1451 // Make norm measurements relative to sequenceLength
1449 unsigned w = sequenceLength-1; 1452 unsigned w = sequenceLength-1;
1450 unsigned i,j; 1453 unsigned i,j;
1452 double tmp1,tmp2; 1455 double tmp1,tmp2;
1453 1456
1454 // Copy the L2 norm values to core to avoid disk random access later on 1457 // Copy the L2 norm values to core to avoid disk random access later on
1455 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); 1458 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
1456 double* snPtr = sNorm; 1459 double* snPtr = sNorm;
1457 for(i=0; i<dbH->numFiles; i++){ 1460 for(i=0; i<ntohl(dbH->numFiles); i++){
1458 if(trackTable[i]>=sequenceLength){ 1461 if(trackTable[i]>=sequenceLength){
1459 tmp1=*snPtr; 1462 tmp1=*snPtr;
1460 j=1; 1463 j=1;
1461 w=sequenceLength-1; 1464 w=sequenceLength-1;
1462 while(w--) 1465 while(w--)
1478 } 1481 }
1479 snPtr+=trackTable[i]; 1482 snPtr+=trackTable[i];
1480 } 1483 }
1481 1484
1482 double* pn = sMeanL2; 1485 double* pn = sMeanL2;
1483 w=dbH->numFiles; 1486 w=ntohl(dbH->numFiles);
1484 while(w--) 1487 while(w--)
1485 *pn++=0.0; 1488 *pn++=0.0;
1486 ps=sNorm; 1489 ps=sNorm;
1487 unsigned processedTracks=0; 1490 unsigned processedTracks=0;
1488 for(i=0; i<dbH->numFiles; i++){ 1491 for(i=0; i<ntohl(dbH->numFiles); i++){
1489 if(trackTable[i]>sequenceLength-1){ 1492 if(trackTable[i]>sequenceLength-1){
1490 w = trackTable[i]-sequenceLength+1; 1493 w = trackTable[i]-sequenceLength+1;
1491 pn = sMeanL2+i; 1494 pn = sMeanL2+i;
1492 *pn=0; 1495 *pn=0;
1493 while(w--) 1496 while(w--)
1573 // Timestamp and durations processing 1576 // Timestamp and durations processing
1574 double meanQdur = 0; 1577 double meanQdur = 0;
1575 double* timesdata = 0; 1578 double* timesdata = 0;
1576 double* meanDBdur = 0; 1579 double* meanDBdur = 0;
1577 1580
1578 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ 1581 if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){
1579 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; 1582 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
1580 usingTimes=0; 1583 usingTimes=0;
1581 } 1584 }
1582 1585
1583 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 1586 else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES))
1584 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; 1587 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
1585 1588
1586 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ 1589 else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){
1587 timesdata = new double[numVectors]; 1590 timesdata = new double[numVectors];
1588 assert(timesdata); 1591 assert(timesdata);
1589 insertTimeStamps(numVectors, timesFile, timesdata); 1592 insertTimeStamps(numVectors, timesFile, timesdata);
1590 // Calculate durations of points 1593 // Calculate durations of points
1591 for(k=0; k<numVectors-1; k++){ 1594 for(k=0; k<numVectors-1; k++){
1594 } 1597 }
1595 meanQdur/=k; 1598 meanQdur/=k;
1596 if(verbosity>1) { 1599 if(verbosity>1) {
1597 cerr << "mean query file duration: " << meanQdur << endl; 1600 cerr << "mean query file duration: " << meanQdur << endl;
1598 } 1601 }
1599 meanDBdur = new double[dbH->numFiles]; 1602 meanDBdur = new double[ntohl(dbH->numFiles)];
1600 assert(meanDBdur); 1603 assert(meanDBdur);
1601 for(k=0; k<dbH->numFiles; k++){ 1604 for(k=0; k<ntohl(dbH->numFiles); k++){
1602 meanDBdur[k]=0.0; 1605 meanDBdur[k]=0.0;
1603 for(j=0; j<trackTable[k]-1 ; j++) 1606 for(j=0; j<trackTable[k]-1 ; j++)
1604 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; 1607 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
1605 meanDBdur[k]/=j; 1608 meanDBdur[k]/=j;
1606 } 1609 }
1611 error("queryPoint > numVectors-wL+1 in query"); 1614 error("queryPoint > numVectors-wL+1 in query");
1612 else{ 1615 else{
1613 if(verbosity>1) { 1616 if(verbosity>1) {
1614 cerr << "query point: " << queryPoint << endl; cerr.flush(); 1617 cerr << "query point: " << queryPoint << endl; cerr.flush();
1615 } 1618 }
1616 query=query+queryPoint*dbH->dim; 1619 query=query+queryPoint*ntohl(dbH->dim);
1617 qNorm=qNorm+queryPoint; 1620 qNorm=qNorm+queryPoint;
1618 numVectors=wL; 1621 numVectors=wL;
1619 } 1622 }
1620 1623
1621 double ** D = 0; // Differences query and target 1624 double ** D = 0; // Differences query and target
1633 double* qp; 1636 double* qp;
1634 double* sp; 1637 double* sp;
1635 double* dp; 1638 double* dp;
1636 1639
1637 // build track offset table 1640 // build track offset table
1638 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; 1641 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)];
1639 unsigned cumTrack=0; 1642 unsigned cumTrack=0;
1640 unsigned trackIndexOffset; 1643 unsigned trackIndexOffset;
1641 for(k=0; k<dbH->numFiles;k++){ 1644 for(k=0; k<ntohl(dbH->numFiles);k++){
1642 trackOffsetTable[k]=cumTrack; 1645 trackOffsetTable[k]=cumTrack;
1643 cumTrack+=trackTable[k]*dbH->dim; 1646 cumTrack+=trackTable[k]*ntohl(dbH->dim);
1644 } 1647 }
1645 1648
1646 char nextKey [MAXSTR]; 1649 char nextKey [MAXSTR];
1647 1650
1648 // chi^2 statistics 1651 // chi^2 statistics
1651 double logSampleSum = 0; 1654 double logSampleSum = 0;
1652 double minSample = 1e9; 1655 double minSample = 1e9;
1653 double maxSample = 0; 1656 double maxSample = 0;
1654 1657
1655 // Track loop 1658 // Track loop
1656 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ 1659 for(processedTracks=0, track=0 ; processedTracks < ntohl(dbH->numFiles) ; track++, processedTracks++){
1657 1660
1658 // get trackID from file if using a control file 1661 // get trackID from file if using a control file
1659 if(trackFile){ 1662 if(trackFile){
1660 if(!trackFile->eof()){ 1663 if(!trackFile->eof()){
1661 trackFile->getline(nextKey,MAXSTR); 1664 trackFile->getline(nextKey,MAXSTR);
1664 else 1667 else
1665 break; 1668 break;
1666 } 1669 }
1667 1670
1668 trackOffset=trackOffsetTable[track]; // numDoubles offset 1671 trackOffset=trackOffsetTable[track]; // numDoubles offset
1669 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset 1672 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset
1670 1673
1671 if(sequenceLength<=trackTable[track]){ // test for short sequences 1674 if(sequenceLength<=trackTable[track]){ // test for short sequences
1672 1675
1673 if(verbosity>7) { 1676 if(verbosity>7) {
1674 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); 1677 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
1688 } 1691 }
1689 1692
1690 // Dot product 1693 // Dot product
1691 for(j=0; j<numVectors; j++) 1694 for(j=0; j<numVectors; j++)
1692 for(k=0; k<trackTable[track]; k++){ 1695 for(k=0; k<trackTable[track]; k++){
1693 qp=query+j*dbH->dim; 1696 qp=query+j*ntohl(dbH->dim);
1694 sp=dataBuf+trackOffset+k*dbH->dim; 1697 sp=dataBuf+trackOffset+k*ntohl(dbH->dim);
1695 DD[j][k]=0.0; // Initialize matched filter array 1698 DD[j][k]=0.0; // Initialize matched filter array
1696 dp=&D[j][k]; // point to correlation cell j,k 1699 dp=&D[j][k]; // point to correlation cell j,k
1697 *dp=0.0; // initialize correlation cell 1700 *dp=0.0; // initialize correlation cell
1698 l=dbH->dim; // size of vectors 1701 l=ntohl(dbH->dim); // size of vectors
1699 while(l--) 1702 while(l--)
1700 *dp+=*qp++**sp++; 1703 *dp+=*qp++**sp++;
1701 } 1704 }
1702 1705
1703 // Matched Filter 1706 // Matched Filter
1913 1916
1914 initTables(dbName, 0, inFile); 1917 initTables(dbName, 0, inFile);
1915 1918
1916 // For each input vector, find the closest pointNN matching output vectors and report 1919 // For each input vector, find the closest pointNN matching output vectors and report
1917 // we use stdout in this stub version 1920 // we use stdout in this stub version
1918 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 1921 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim));
1919 double* query = (double*)(indata+sizeof(int)); 1922 double* query = (double*)(indata+sizeof(int));
1920 double* queryCopy = 0; 1923 double* queryCopy = 0;
1921 1924
1922 double qMeanL2; 1925 double qMeanL2;
1923 double* sMeanL2; 1926 double* sMeanL2;
1924 1927
1925 unsigned USE_THRESH=0; 1928 unsigned USE_THRESH=0;
1926 double SILENCE_THRESH=0; 1929 double SILENCE_THRESH=0;
1927 double DIFF_THRESH=0; 1930 double DIFF_THRESH=0;
1928 1931
1929 if(!(dbH->flags & O2_FLAG_L2NORM) ) 1932 if(!(ntohl(dbH->flags) & O2_FLAG_L2NORM) )
1930 error("Database must be L2 normed for sequence query","use -l2norm"); 1933 error("Database must be L2 normed for sequence query","use -l2norm");
1931 1934
1932 if(verbosity>1) { 1935 if(verbosity>1) {
1933 cerr << "performing norms ... "; cerr.flush(); 1936 cerr << "performing norms ... "; cerr.flush();
1934 } 1937 }
1935 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); 1938 unsigned dbVectors = ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim));
1936 1939
1937 // Make a copy of the query 1940 // Make a copy of the query
1938 queryCopy = new double[numVectors*dbH->dim]; 1941 queryCopy = new double[numVectors*ntohl(dbH->dim)];
1939 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); 1942 memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double));
1940 qNorm = new double[numVectors]; 1943 qNorm = new double[numVectors];
1941 sNorm = new double[dbVectors]; 1944 sNorm = new double[dbVectors];
1942 sMeanL2=new double[dbH->numFiles]; 1945 sMeanL2=new double[ntohl(dbH->numFiles)];
1943 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); 1946 assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
1944 unitNorm(queryCopy, dbH->dim, numVectors, qNorm); 1947 unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm);
1945 query = queryCopy; 1948 query = queryCopy;
1946 1949
1947 // Make norm measurements relative to sequenceLength 1950 // Make norm measurements relative to sequenceLength
1948 unsigned w = sequenceLength-1; 1951 unsigned w = sequenceLength-1;
1949 unsigned i,j; 1952 unsigned i,j;
1951 double tmp1,tmp2; 1954 double tmp1,tmp2;
1952 1955
1953 // Copy the L2 norm values to core to avoid disk random access later on 1956 // Copy the L2 norm values to core to avoid disk random access later on
1954 memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); 1957 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
1955 double* snPtr = sNorm; 1958 double* snPtr = sNorm;
1956 for(i=0; i<dbH->numFiles; i++){ 1959 for(i=0; i<ntohl(dbH->numFiles); i++){
1957 if(trackTable[i]>=sequenceLength){ 1960 if(trackTable[i]>=sequenceLength){
1958 tmp1=*snPtr; 1961 tmp1=*snPtr;
1959 j=1; 1962 j=1;
1960 w=sequenceLength-1; 1963 w=sequenceLength-1;
1961 while(w--) 1964 while(w--)
1977 } 1980 }
1978 snPtr+=trackTable[i]; 1981 snPtr+=trackTable[i];
1979 } 1982 }
1980 1983
1981 double* pn = sMeanL2; 1984 double* pn = sMeanL2;
1982 w=dbH->numFiles; 1985 w=ntohl(dbH->numFiles);
1983 while(w--) 1986 while(w--)
1984 *pn++=0.0; 1987 *pn++=0.0;
1985 ps=sNorm; 1988 ps=sNorm;
1986 unsigned processedTracks=0; 1989 unsigned processedTracks=0;
1987 for(i=0; i<dbH->numFiles; i++){ 1990 for(i=0; i<ntohl(dbH->numFiles); i++){
1988 if(trackTable[i]>sequenceLength-1){ 1991 if(trackTable[i]>sequenceLength-1){
1989 w = trackTable[i]-sequenceLength+1; 1992 w = trackTable[i]-sequenceLength+1;
1990 pn = sMeanL2+i; 1993 pn = sMeanL2+i;
1991 *pn=0; 1994 *pn=0;
1992 while(w--) 1995 while(w--)
2072 // Timestamp and durations processing 2075 // Timestamp and durations processing
2073 double meanQdur = 0; 2076 double meanQdur = 0;
2074 double* timesdata = 0; 2077 double* timesdata = 0;
2075 double* meanDBdur = 0; 2078 double* meanDBdur = 0;
2076 2079
2077 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ 2080 if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){
2078 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; 2081 cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
2079 usingTimes=0; 2082 usingTimes=0;
2080 } 2083 }
2081 2084
2082 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 2085 else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES))
2083 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; 2086 cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
2084 2087
2085 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ 2088 else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){
2086 timesdata = new double[numVectors]; 2089 timesdata = new double[numVectors];
2087 assert(timesdata); 2090 assert(timesdata);
2088 insertTimeStamps(numVectors, timesFile, timesdata); 2091 insertTimeStamps(numVectors, timesFile, timesdata);
2089 // Calculate durations of points 2092 // Calculate durations of points
2090 for(k=0; k<numVectors-1; k++){ 2093 for(k=0; k<numVectors-1; k++){
2093 } 2096 }
2094 meanQdur/=k; 2097 meanQdur/=k;
2095 if(verbosity>1) { 2098 if(verbosity>1) {
2096 cerr << "mean query file duration: " << meanQdur << endl; 2099 cerr << "mean query file duration: " << meanQdur << endl;
2097 } 2100 }
2098 meanDBdur = new double[dbH->numFiles]; 2101 meanDBdur = new double[ntohl(dbH->numFiles)];
2099 assert(meanDBdur); 2102 assert(meanDBdur);
2100 for(k=0; k<dbH->numFiles; k++){ 2103 for(k=0; k<ntohl(dbH->numFiles); k++){
2101 meanDBdur[k]=0.0; 2104 meanDBdur[k]=0.0;
2102 for(j=0; j<trackTable[k]-1 ; j++) 2105 for(j=0; j<trackTable[k]-1 ; j++)
2103 meanDBdur[k]+=timesTable[j+1]-timesTable[j]; 2106 meanDBdur[k]+=timesTable[j+1]-timesTable[j];
2104 meanDBdur[k]/=j; 2107 meanDBdur[k]/=j;
2105 } 2108 }
2110 error("queryPoint > numVectors-wL+1 in query"); 2113 error("queryPoint > numVectors-wL+1 in query");
2111 else{ 2114 else{
2112 if(verbosity>1) { 2115 if(verbosity>1) {
2113 cerr << "query point: " << queryPoint << endl; cerr.flush(); 2116 cerr << "query point: " << queryPoint << endl; cerr.flush();
2114 } 2117 }
2115 query=query+queryPoint*dbH->dim; 2118 query=query+queryPoint*ntohl(dbH->dim);
2116 qNorm=qNorm+queryPoint; 2119 qNorm=qNorm+queryPoint;
2117 numVectors=wL; 2120 numVectors=wL;
2118 } 2121 }
2119 2122
2120 double ** D = 0; // Differences query and target 2123 double ** D = 0; // Differences query and target
2132 double* qp; 2135 double* qp;
2133 double* sp; 2136 double* sp;
2134 double* dp; 2137 double* dp;
2135 2138
2136 // build track offset table 2139 // build track offset table
2137 unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; 2140 unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)];
2138 unsigned cumTrack=0; 2141 unsigned cumTrack=0;
2139 unsigned trackIndexOffset; 2142 unsigned trackIndexOffset;
2140 for(k=0; k<dbH->numFiles;k++){ 2143 for(k=0; k<ntohl(dbH->numFiles);k++){
2141 trackOffsetTable[k]=cumTrack; 2144 trackOffsetTable[k]=cumTrack;
2142 cumTrack+=trackTable[k]*dbH->dim; 2145 cumTrack+=trackTable[k]*ntohl(dbH->dim);
2143 } 2146 }
2144 2147
2145 char nextKey [MAXSTR]; 2148 char nextKey [MAXSTR];
2146 2149
2147 // chi^2 statistics 2150 // chi^2 statistics
2150 double logSampleSum = 0; 2153 double logSampleSum = 0;
2151 double minSample = 1e9; 2154 double minSample = 1e9;
2152 double maxSample = 0; 2155 double maxSample = 0;
2153 2156
2154 // Track loop 2157 // Track loop
2155 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ 2158 for(processedTracks=0, track=0 ; processedTracks < ntohl(dbH->numFiles) ; track++, processedTracks++){
2156 2159
2157 // get trackID from file if using a control file 2160 // get trackID from file if using a control file
2158 if(trackFile){ 2161 if(trackFile){
2159 if(!trackFile->eof()){ 2162 if(!trackFile->eof()){
2160 trackFile->getline(nextKey,MAXSTR); 2163 trackFile->getline(nextKey,MAXSTR);
2163 else 2166 else
2164 break; 2167 break;
2165 } 2168 }
2166 2169
2167 trackOffset=trackOffsetTable[track]; // numDoubles offset 2170 trackOffset=trackOffsetTable[track]; // numDoubles offset
2168 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset 2171 trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset
2169 2172
2170 if(sequenceLength<=trackTable[track]){ // test for short sequences 2173 if(sequenceLength<=trackTable[track]){ // test for short sequences
2171 2174
2172 if(verbosity>7) { 2175 if(verbosity>7) {
2173 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); 2176 cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
2187 } 2190 }
2188 2191
2189 // Dot product 2192 // Dot product
2190 for(j=0; j<numVectors; j++) 2193 for(j=0; j<numVectors; j++)
2191 for(k=0; k<trackTable[track]; k++){ 2194 for(k=0; k<trackTable[track]; k++){
2192 qp=query+j*dbH->dim; 2195 qp=query+j*ntohl(dbH->dim);
2193 sp=dataBuf+trackOffset+k*dbH->dim; 2196 sp=dataBuf+trackOffset+k*ntohl(dbH->dim);
2194 DD[j][k]=0.0; // Initialize matched filter array 2197 DD[j][k]=0.0; // Initialize matched filter array
2195 dp=&D[j][k]; // point to correlation cell j,k 2198 dp=&D[j][k]; // point to correlation cell j,k
2196 *dp=0.0; // initialize correlation cell 2199 *dp=0.0; // initialize correlation cell
2197 l=dbH->dim; // size of vectors 2200 l=ntohl(dbH->dim); // size of vectors
2198 while(l--) 2201 while(l--)
2199 *dp+=*qp++**sp++; 2202 *dp+=*qp++**sp++;
2200 } 2203 }
2201 2204
2202 // Matched Filter 2205 // Matched Filter
2419 double *p; 2422 double *p;
2420 unsigned nn = n; 2423 unsigned nn = n;
2421 2424
2422 assert(l2normTable); 2425 assert(l2normTable);
2423 2426
2424 if( !append && (dbH->flags & O2_FLAG_L2NORM) ) 2427 if( !append && (ntohl(dbH->flags) & O2_FLAG_L2NORM) )
2425 error("Database is already L2 normed", "automatic norm on insert is enabled"); 2428 error("Database is already L2 normed", "automatic norm on insert is enabled");
2426 2429
2427 if(verbosity>2) { 2430 if(verbosity>2) {
2428 cerr << "norming " << n << " vectors...";cerr.flush(); 2431 cerr << "norming " << n << " vectors...";cerr.flush();
2429 } 2432 }
2456 if(append) { 2459 if(append) {
2457 // FIXME: a hack, a very palpable hack: the vectors have already 2460 // FIXME: a hack, a very palpable hack: the vectors have already
2458 // been inserted, and dbH->length has already been updated. We 2461 // been inserted, and dbH->length has already been updated. We
2459 // need to subtract off again the number of vectors that we've 2462 // need to subtract off again the number of vectors that we've
2460 // inserted this time... 2463 // inserted this time...
2461 offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors 2464 offset=(ntohl(dbH->length)/(ntohl(dbH->dim)*sizeof(double)))-n; // number of vectors
2462 } else { 2465 } else {
2463 offset=0; 2466 offset=0;
2464 } 2467 }
2465 memcpy(l2normTable+offset, l2buf, n*sizeof(double)); 2468 memcpy(l2normTable+offset, l2buf, n*sizeof(double));
2466 if(l2buf) 2469 if(l2buf)