comparison audioDB.cpp @ 112:ae045842d29f audiodb-debian

Merge trunk changes -r123:128 to audiodb-debian branch (+ new debian/changelog version)
author mas01cr
date Fri, 05 Oct 2007 15:42:50 +0000
parents 1521d46bc1ac
children 8d0942525fab
comparison
equal deleted inserted replaced
107:1521d46bc1ac 112:ae045842d29f
418 418
419 dbH = new dbTableHeaderT(); 419 dbH = new dbTableHeaderT();
420 assert(dbH); 420 assert(dbH);
421 421
422 // Initialize header 422 // Initialize header
423 dbH->magic=O2_MAGIC; 423 dbH->magic = O2_MAGIC;
424 dbH->numFiles=0; 424 dbH->version = O2_FORMAT_VERSION;
425 dbH->length=0; 425 dbH->numFiles = 0;
426 dbH->dim=0; 426 dbH->dim = 0;
427 dbH->flags=0; //O2_FLAG_L2NORM; 427 dbH->flags = 0;
428 dbH->length = 0;
429 dbH->fileTableOffset = ALIGN_UP(O2_HEADERSIZE, 8);
430 dbH->trackTableOffset = ALIGN_UP(dbH->fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES, 8);
431 dbH->dataOffset = ALIGN_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES, 8);
432 dbH->l2normTableOffset = ALIGN_DOWN(O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8);
433 dbH->timesTableOffset = ALIGN_DOWN(dbH->l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8);
428 434
429 memcpy (db, dbH, O2_HEADERSIZE); 435 memcpy (db, dbH, O2_HEADERSIZE);
430 if(verbosity) { 436 if(verbosity) {
431 cerr << COM_CREATE << " " << dbName << endl; 437 cerr << COM_CREATE << " " << dbName << endl;
432 } 438 }
433 } 439 }
434 440
435 441
436 void audioDB::drop(){ 442 void audioDB::drop(){
437 443 // FIXME: drop something? Should we even allow this?
438
439 } 444 }
440 445
441 // initTables - memory map files passed as arguments 446 // initTables - memory map files passed as arguments
442 // Precondition: database has already been created 447 // Precondition: database has already been created
443 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) { 448 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) {
460 465
461 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { 466 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) {
462 error("error reading db header", dbName, "read"); 467 error("error reading db header", dbName, "read");
463 } 468 }
464 469
465 fileTableOffset = O2_HEADERSIZE; 470 if(dbH->magic == O2_OLD_MAGIC) {
466 trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; 471 // FIXME: if anyone ever complains, write the program to convert
467 dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES; 472 // from the old audioDB format to the new...
468 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); 473 error("database file has old O2 header", dbName);
469 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); 474 }
470 475
471 if(dbH->magic != O2_MAGIC) { 476 if(dbH->magic != O2_MAGIC) {
472 cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl; 477 cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl;
473 error("database file has incorrect header",dbName); 478 error("database file has incorrect header", dbName);
479 }
480
481 if(dbH->version != O2_FORMAT_VERSION) {
482 error("database file has incorect version", dbName);
474 } 483 }
475 484
476 if(inFile) 485 if(inFile)
477 if(dbH->dim == 0 && dbH->length == 0) // empty database 486 if(dbH->dim == 0 && dbH->length == 0) // empty database
478 // initialize with input dimensionality 487 // initialize with input dimensionality
495 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0), 504 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0),
496 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) 505 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
497 error("mmap error for initting tables of database", "", "mmap"); 506 error("mmap error for initting tables of database", "", "mmap");
498 507
499 // Make some handy tables with correct types 508 // Make some handy tables with correct types
500 fileTable= (char*)(db+fileTableOffset); 509 fileTable= (char*)(db+dbH->fileTableOffset);
501 trackTable = (unsigned*)(db+trackTableOffset); 510 trackTable = (unsigned*)(db+dbH->trackTableOffset);
502 dataBuf = (double*)(db+dataoffset); 511 dataBuf = (double*)(db+dbH->dataOffset);
503 l2normTable = (double*)(db+l2normTableOffset); 512 l2normTable = (double*)(db+dbH->l2normTableOffset);
504 timesTable = (double*)(db+timesTableOffset); 513 timesTable = (double*)(db+dbH->timesTableOffset);
505 } 514 }
506 515
507 void audioDB::insert(const char* dbName, const char* inFile){ 516 void audioDB::insert(const char* dbName, const char* inFile){
508 517
509 initTables(dbName, 1, inFile); 518 initTables(dbName, 1, inFile);
567 // Update track to file index map 576 // Update track to file index map
568 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); 577 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
569 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); 578 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
570 579
571 // Update the feature database 580 // Update the feature database
572 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); 581 memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
573 582
574 // Norm the vectors on input if the database is already L2 normed 583 // Norm the vectors on input if the database is already L2 normed
575 if(dbH->flags & O2_FLAG_L2NORM) 584 if(dbH->flags & O2_FLAG_L2NORM)
576 unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append 585 unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append
577 586
578 // Report status 587 // Report status
579 status(dbName); 588 status(dbName);
580 if(verbosity) { 589 if(verbosity) {
581 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " 590 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
666 error("error reading db header"); 675 error("error reading db header");
667 676
668 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 677 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
669 error("Must use timestamps with timestamped database","use --times"); 678 error("Must use timestamps with timestamped database","use --times");
670 679
671 fileTableOffset = O2_HEADERSIZE;
672 trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES;
673 dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES;
674 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
675 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
676
677 if(dbH->magic!=O2_MAGIC){ 680 if(dbH->magic!=O2_MAGIC){
678 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; 681 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl;
679 error("database file has incorrect header",dbName); 682 error("database file has incorrect header",dbName);
680 } 683 }
681
682 684
683 unsigned totalVectors=0; 685 unsigned totalVectors=0;
684 char *thisKey = new char[MAXSTR]; 686 char *thisKey = new char[MAXSTR];
685 char *thisFile = new char[MAXSTR]; 687 char *thisFile = new char[MAXSTR];
686 char *thisTimesFileName = new char[MAXSTR]; 688 char *thisTimesFileName = new char[MAXSTR];
709 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, 711 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
710 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) 712 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
711 error("mmap error for batchinsert into database", "", "mmap"); 713 error("mmap error for batchinsert into database", "", "mmap");
712 714
713 // Make some handy tables with correct types 715 // Make some handy tables with correct types
714 fileTable= (char*)(db+fileTableOffset); 716 fileTable= (char*)(db+dbH->fileTableOffset);
715 trackTable = (unsigned*)(db+trackTableOffset); 717 trackTable = (unsigned*)(db+dbH->trackTableOffset);
716 dataBuf = (double*)(db+dataoffset); 718 dataBuf = (double*)(db+dbH->dataOffset);
717 l2normTable = (double*)(db+l2normTableOffset); 719 l2normTable = (double*)(db+dbH->l2normTableOffset);
718 timesTable = (double*)(db+timesTableOffset); 720 timesTable = (double*)(db+dbH->timesTableOffset);
719 721
720 // Check that there is room for at least 1 more file 722 // Check that there is room for at least 1 more file
721 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) 723 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int))))
722 error("No more room in database","insert failed: reason database is full."); 724 error("No more room in database","insert failed: reason database is full.");
723 725
793 // Update track to file index map 795 // Update track to file index map
794 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); 796 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
795 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); 797 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
796 798
797 // Update the feature database 799 // Update the feature database
798 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); 800 memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
799 801
800 // Norm the vectors on input if the database is already L2 normed 802 // Norm the vectors on input if the database is already L2 normed
801 if(dbH->flags & O2_FLAG_L2NORM) 803 if(dbH->flags & O2_FLAG_L2NORM)
802 unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append 804 unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append
803 805
804 totalVectors+=numVectors; 806 totalVectors+=numVectors;
805 } 807 }
806 } 808 }
807 // CLEAN UP 809 // CLEAN UP
893 // Update Header information 895 // Update Header information
894 cout << "num files:" << dbH->numFiles << endl; 896 cout << "num files:" << dbH->numFiles << endl;
895 cout << "data dim:" << dbH->dim <<endl; 897 cout << "data dim:" << dbH->dim <<endl;
896 if(dbH->dim>0){ 898 if(dbH->dim>0){
897 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl; 899 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl;
898 cout << "vectors available:" << (timesTableOffset-(dataoffset+dbH->length))/(sizeof(double)*dbH->dim) << endl; 900 cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << endl;
899 } 901 }
900 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(timesTableOffset-dataoffset) << "%)" << endl; 902 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl;
901 cout << "bytes available:" << timesTableOffset-(dataoffset+dbH->length) << " (" << 903 cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" <<
902 (100.0*(timesTableOffset-(dataoffset+dbH->length)))/(timesTableOffset-dataoffset) << "%)" << endl; 904 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl;
903 cout << "flags:" << dbH->flags << endl; 905 cout << "flags:" << dbH->flags << endl;
904 906
905 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; 907 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl;
906 } else { 908 } else {
907 adbStatusResult->numFiles = dbH->numFiles; 909 adbStatusResult->numFiles = dbH->numFiles;