Mercurial > hg > audiodb
comparison audioDB.cpp @ 112:ae045842d29f audiodb-debian
Merge trunk changes -r123:128 to audiodb-debian branch
(+ new debian/changelog version)
author | mas01cr |
---|---|
date | Fri, 05 Oct 2007 15:42:50 +0000 |
parents | 1521d46bc1ac |
children | 8d0942525fab |
comparison
equal
deleted
inserted
replaced
107:1521d46bc1ac | 112:ae045842d29f |
---|---|
418 | 418 |
419 dbH = new dbTableHeaderT(); | 419 dbH = new dbTableHeaderT(); |
420 assert(dbH); | 420 assert(dbH); |
421 | 421 |
422 // Initialize header | 422 // Initialize header |
423 dbH->magic=O2_MAGIC; | 423 dbH->magic = O2_MAGIC; |
424 dbH->numFiles=0; | 424 dbH->version = O2_FORMAT_VERSION; |
425 dbH->length=0; | 425 dbH->numFiles = 0; |
426 dbH->dim=0; | 426 dbH->dim = 0; |
427 dbH->flags=0; //O2_FLAG_L2NORM; | 427 dbH->flags = 0; |
428 dbH->length = 0; | |
429 dbH->fileTableOffset = ALIGN_UP(O2_HEADERSIZE, 8); | |
430 dbH->trackTableOffset = ALIGN_UP(dbH->fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES, 8); | |
431 dbH->dataOffset = ALIGN_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES, 8); | |
432 dbH->l2normTableOffset = ALIGN_DOWN(O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8); | |
433 dbH->timesTableOffset = ALIGN_DOWN(dbH->l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8); | |
428 | 434 |
429 memcpy (db, dbH, O2_HEADERSIZE); | 435 memcpy (db, dbH, O2_HEADERSIZE); |
430 if(verbosity) { | 436 if(verbosity) { |
431 cerr << COM_CREATE << " " << dbName << endl; | 437 cerr << COM_CREATE << " " << dbName << endl; |
432 } | 438 } |
433 } | 439 } |
434 | 440 |
435 | 441 |
436 void audioDB::drop(){ | 442 void audioDB::drop(){ |
437 | 443 // FIXME: drop something? Should we even allow this? |
438 | |
439 } | 444 } |
440 | 445 |
441 // initTables - memory map files passed as arguments | 446 // initTables - memory map files passed as arguments |
442 // Precondition: database has already been created | 447 // Precondition: database has already been created |
443 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) { | 448 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) { |
460 | 465 |
461 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { | 466 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { |
462 error("error reading db header", dbName, "read"); | 467 error("error reading db header", dbName, "read"); |
463 } | 468 } |
464 | 469 |
465 fileTableOffset = O2_HEADERSIZE; | 470 if(dbH->magic == O2_OLD_MAGIC) { |
466 trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; | 471 // FIXME: if anyone ever complains, write the program to convert |
467 dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES; | 472 // from the old audioDB format to the new... |
468 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | 473 error("database file has old O2 header", dbName); |
469 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | 474 } |
470 | 475 |
471 if(dbH->magic != O2_MAGIC) { | 476 if(dbH->magic != O2_MAGIC) { |
472 cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl; | 477 cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl; |
473 error("database file has incorrect header",dbName); | 478 error("database file has incorrect header", dbName); |
479 } | |
480 | |
481 if(dbH->version != O2_FORMAT_VERSION) { | |
482 error("database file has incorect version", dbName); | |
474 } | 483 } |
475 | 484 |
476 if(inFile) | 485 if(inFile) |
477 if(dbH->dim == 0 && dbH->length == 0) // empty database | 486 if(dbH->dim == 0 && dbH->length == 0) // empty database |
478 // initialize with input dimensionality | 487 // initialize with input dimensionality |
495 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0), | 504 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0), |
496 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | 505 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) |
497 error("mmap error for initting tables of database", "", "mmap"); | 506 error("mmap error for initting tables of database", "", "mmap"); |
498 | 507 |
499 // Make some handy tables with correct types | 508 // Make some handy tables with correct types |
500 fileTable= (char*)(db+fileTableOffset); | 509 fileTable= (char*)(db+dbH->fileTableOffset); |
501 trackTable = (unsigned*)(db+trackTableOffset); | 510 trackTable = (unsigned*)(db+dbH->trackTableOffset); |
502 dataBuf = (double*)(db+dataoffset); | 511 dataBuf = (double*)(db+dbH->dataOffset); |
503 l2normTable = (double*)(db+l2normTableOffset); | 512 l2normTable = (double*)(db+dbH->l2normTableOffset); |
504 timesTable = (double*)(db+timesTableOffset); | 513 timesTable = (double*)(db+dbH->timesTableOffset); |
505 } | 514 } |
506 | 515 |
507 void audioDB::insert(const char* dbName, const char* inFile){ | 516 void audioDB::insert(const char* dbName, const char* inFile){ |
508 | 517 |
509 initTables(dbName, 1, inFile); | 518 initTables(dbName, 1, inFile); |
567 // Update track to file index map | 576 // Update track to file index map |
568 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); | 577 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); |
569 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | 578 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); |
570 | 579 |
571 // Update the feature database | 580 // Update the feature database |
572 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); | 581 memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); |
573 | 582 |
574 // Norm the vectors on input if the database is already L2 normed | 583 // Norm the vectors on input if the database is already L2 normed |
575 if(dbH->flags & O2_FLAG_L2NORM) | 584 if(dbH->flags & O2_FLAG_L2NORM) |
576 unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append | 585 unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append |
577 | 586 |
578 // Report status | 587 // Report status |
579 status(dbName); | 588 status(dbName); |
580 if(verbosity) { | 589 if(verbosity) { |
581 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " | 590 cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " |
666 error("error reading db header"); | 675 error("error reading db header"); |
667 | 676 |
668 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 677 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) |
669 error("Must use timestamps with timestamped database","use --times"); | 678 error("Must use timestamps with timestamped database","use --times"); |
670 | 679 |
671 fileTableOffset = O2_HEADERSIZE; | |
672 trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; | |
673 dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES; | |
674 l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | |
675 timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); | |
676 | |
677 if(dbH->magic!=O2_MAGIC){ | 680 if(dbH->magic!=O2_MAGIC){ |
678 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; | 681 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; |
679 error("database file has incorrect header",dbName); | 682 error("database file has incorrect header",dbName); |
680 } | 683 } |
681 | |
682 | 684 |
683 unsigned totalVectors=0; | 685 unsigned totalVectors=0; |
684 char *thisKey = new char[MAXSTR]; | 686 char *thisKey = new char[MAXSTR]; |
685 char *thisFile = new char[MAXSTR]; | 687 char *thisFile = new char[MAXSTR]; |
686 char *thisTimesFileName = new char[MAXSTR]; | 688 char *thisTimesFileName = new char[MAXSTR]; |
709 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, | 711 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, |
710 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | 712 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) |
711 error("mmap error for batchinsert into database", "", "mmap"); | 713 error("mmap error for batchinsert into database", "", "mmap"); |
712 | 714 |
713 // Make some handy tables with correct types | 715 // Make some handy tables with correct types |
714 fileTable= (char*)(db+fileTableOffset); | 716 fileTable= (char*)(db+dbH->fileTableOffset); |
715 trackTable = (unsigned*)(db+trackTableOffset); | 717 trackTable = (unsigned*)(db+dbH->trackTableOffset); |
716 dataBuf = (double*)(db+dataoffset); | 718 dataBuf = (double*)(db+dbH->dataOffset); |
717 l2normTable = (double*)(db+l2normTableOffset); | 719 l2normTable = (double*)(db+dbH->l2normTableOffset); |
718 timesTable = (double*)(db+timesTableOffset); | 720 timesTable = (double*)(db+dbH->timesTableOffset); |
719 | 721 |
720 // Check that there is room for at least 1 more file | 722 // Check that there is room for at least 1 more file |
721 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) | 723 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) |
722 error("No more room in database","insert failed: reason database is full."); | 724 error("No more room in database","insert failed: reason database is full."); |
723 | 725 |
793 // Update track to file index map | 795 // Update track to file index map |
794 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); | 796 //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); |
795 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); | 797 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); |
796 | 798 |
797 // Update the feature database | 799 // Update the feature database |
798 memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); | 800 memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); |
799 | 801 |
800 // Norm the vectors on input if the database is already L2 normed | 802 // Norm the vectors on input if the database is already L2 normed |
801 if(dbH->flags & O2_FLAG_L2NORM) | 803 if(dbH->flags & O2_FLAG_L2NORM) |
802 unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append | 804 unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append |
803 | 805 |
804 totalVectors+=numVectors; | 806 totalVectors+=numVectors; |
805 } | 807 } |
806 } | 808 } |
807 // CLEAN UP | 809 // CLEAN UP |
893 // Update Header information | 895 // Update Header information |
894 cout << "num files:" << dbH->numFiles << endl; | 896 cout << "num files:" << dbH->numFiles << endl; |
895 cout << "data dim:" << dbH->dim <<endl; | 897 cout << "data dim:" << dbH->dim <<endl; |
896 if(dbH->dim>0){ | 898 if(dbH->dim>0){ |
897 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl; | 899 cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl; |
898 cout << "vectors available:" << (timesTableOffset-(dataoffset+dbH->length))/(sizeof(double)*dbH->dim) << endl; | 900 cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << endl; |
899 } | 901 } |
900 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(timesTableOffset-dataoffset) << "%)" << endl; | 902 cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; |
901 cout << "bytes available:" << timesTableOffset-(dataoffset+dbH->length) << " (" << | 903 cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << |
902 (100.0*(timesTableOffset-(dataoffset+dbH->length)))/(timesTableOffset-dataoffset) << "%)" << endl; | 904 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; |
903 cout << "flags:" << dbH->flags << endl; | 905 cout << "flags:" << dbH->flags << endl; |
904 | 906 |
905 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; | 907 cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; |
906 } else { | 908 } else { |
907 adbStatusResult->numFiles = dbH->numFiles; | 909 adbStatusResult->numFiles = dbH->numFiles; |