Mercurial > hg > audiodb
comparison audioDB.cpp @ 120:fce73e4afa15
Make a start on refactoring: work the DB header initialization into its
own function, used from both initTables and batchinsert.
This actually removes a mmap()/munmap() pair from inside the batchinsert
loop over files. Not really sure why that was there in the first place;
maybe to sync the header to disk more forcefully? In any case, no
change in behaviour should happen as a result of this code change.
author | mas01cr |
---|---|
date | Tue, 16 Oct 2007 11:47:51 +0000 |
parents | bc141fd1dc41 |
children | 90eab30d2f79 |
comparison
equal
deleted
inserted
replaced
114:942e9ab50e9c | 120:fce73e4afa15 |
---|---|
312 return 0; | 312 return 0; |
313 } | 313 } |
314 return -1; // no command found | 314 return -1; // no command found |
315 } | 315 } |
316 | 316 |
317 /* Make a new database | |
318 | |
319 The database consists of: | |
320 | |
321 header | |
322 --------------------------------------------------------------------------------- | |
323 | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes | | |
324 --------------------------------------------------------------------------------- | |
325 | |
326 | |
327 keyTable : list of keys of tracks | |
328 -------------------------------------------------------------------------- | |
329 | key 256 bytes | | |
330 -------------------------------------------------------------------------- | |
331 O2_MAXFILES*O2_FILENAMELENGTH | |
332 | |
333 trackTable : Maps implicit feature index to a feature vector matrix | |
334 -------------------------------------------------------------------------- | |
335 | numVectors (4 bytes) | | |
336 -------------------------------------------------------------------------- | |
337 O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(INT) | |
338 | |
339 featureTable | |
340 -------------------------------------------------------------------------- | |
341 | v1 v2 v3 ... vd (double) | | |
342 -------------------------------------------------------------------------- | |
343 O2_MAXFILES * O2_MEANNUMFEATURES * DIM * sizeof(DOUBLE) | |
344 | |
345 timesTable | |
346 -------------------------------------------------------------------------- | |
347 | timestamp (double) | | |
348 -------------------------------------------------------------------------- | |
349 O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE) | |
350 | |
351 l2normTable | |
352 -------------------------------------------------------------------------- | |
353 | nm (double) | | |
354 -------------------------------------------------------------------------- | |
355 O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE) | |
356 | |
357 */ | |
358 | |
359 void audioDB::get_lock(int fd, bool exclusive) { | 317 void audioDB::get_lock(int fd, bool exclusive) { |
360 struct flock lock; | 318 struct flock lock; |
361 int status; | 319 int status; |
362 | 320 |
363 lock.l_type = exclusive ? F_WRLCK : F_RDLCK; | 321 lock.l_type = exclusive ? F_WRLCK : F_RDLCK; |
393 | 351 |
394 if (status) | 352 if (status) |
395 error("fcntl unlock error", "", "fcntl"); | 353 error("fcntl unlock error", "", "fcntl"); |
396 } | 354 } |
397 | 355 |
356 /* Make a new database. | |
357 | |
358 The database consists of: | |
359 | |
360 * a header (see dbTableHeader struct definition); | |
361 * keyTable: list of keys of tracks; | |
362 * trackTable: Maps implicit feature index to a feature vector | |
363 matrix (sizes of tracks) | |
364 * featureTable: Lots of doubles; | |
365 * timesTable: time points for each feature vector; | |
366 * l2normTable: squared l2norms for each feature vector. | |
367 */ | |
398 void audioDB::create(const char* dbName){ | 368 void audioDB::create(const char* dbName){ |
399 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) | 369 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) |
400 error("Can't create database file", dbName, "open"); | 370 error("Can't create database file", dbName, "open"); |
401 get_lock(dbfid, 1); | 371 get_lock(dbfid, 1); |
402 | 372 |
441 | 411 |
442 void audioDB::drop(){ | 412 void audioDB::drop(){ |
443 // FIXME: drop something? Should we even allow this? | 413 // FIXME: drop something? Should we even allow this? |
444 } | 414 } |
445 | 415 |
446 // initTables - memory map files passed as arguments | 416 void audioDB::initDBHeader(const char* dbName, bool forWrite) { |
447 // Precondition: database has already been created | |
448 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) { | |
449 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { | 417 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { |
450 error("Can't open database file", dbName, "open"); | 418 error("Can't open database file", dbName, "open"); |
451 } | 419 } |
420 | |
452 get_lock(dbfid, forWrite); | 421 get_lock(dbfid, forWrite); |
453 | |
454 // open the input file | |
455 if (inFile && (infid = open(inFile, O_RDONLY)) < 0) { | |
456 error("can't open input file for reading", inFile, "open"); | |
457 } | |
458 // find size of input file | |
459 if (inFile && fstat(infid, &statbuf) < 0) { | |
460 error("fstat error finding size of input", inFile, "fstat"); | |
461 } | |
462 // Get the database header info | 422 // Get the database header info |
463 dbH = new dbTableHeaderT(); | 423 dbH = new dbTableHeaderT(); |
464 assert(dbH); | 424 assert(dbH); |
465 | 425 |
466 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { | 426 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { |
480 | 440 |
481 if(dbH->version != O2_FORMAT_VERSION) { | 441 if(dbH->version != O2_FORMAT_VERSION) { |
482 error("database file has incorect version", dbName); | 442 error("database file has incorect version", dbName); |
483 } | 443 } |
484 | 444 |
445 // mmap the database file | |
446 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0), | |
447 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | |
448 error("mmap error for initting tables of database", "", "mmap"); | |
449 | |
450 // Make some handy tables with correct types | |
451 fileTable = (char *) (db + dbH->fileTableOffset); | |
452 trackTable = (unsigned *) (db + dbH->trackTableOffset); | |
453 dataBuf = (double *) (db + dbH->dataOffset); | |
454 l2normTable = (double *) (db + dbH->l2normTableOffset); | |
455 timesTable = (double *) (db + dbH->timesTableOffset); | |
456 } | |
457 | |
458 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) { | |
459 | |
460 initDBHeader(dbName, forWrite); | |
461 | |
462 // open the input file | |
463 if (inFile && (infid = open(inFile, O_RDONLY)) < 0) { | |
464 error("can't open input file for reading", inFile, "open"); | |
465 } | |
466 // find size of input file | |
467 if (inFile && fstat(infid, &statbuf) < 0) { | |
468 error("fstat error finding size of input", inFile, "fstat"); | |
469 } | |
485 if(inFile) | 470 if(inFile) |
486 if(dbH->dim == 0 && dbH->length == 0) // empty database | 471 if(dbH->dim == 0 && dbH->length == 0) // empty database |
487 // initialize with input dimensionality | 472 // initialize with input dimensionality |
488 read(infid, &dbH->dim, sizeof(unsigned)); | 473 read(infid, &dbH->dim, sizeof(unsigned)); |
489 else { | 474 else { |
497 | 482 |
498 // mmap the input file | 483 // mmap the input file |
499 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) | 484 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) |
500 == (caddr_t) -1) | 485 == (caddr_t) -1) |
501 error("mmap error for input", inFile, "mmap"); | 486 error("mmap error for input", inFile, "mmap"); |
502 | |
503 // mmap the database file | |
504 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0), | |
505 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | |
506 error("mmap error for initting tables of database", "", "mmap"); | |
507 | |
508 // Make some handy tables with correct types | |
509 fileTable= (char*)(db+dbH->fileTableOffset); | |
510 trackTable = (unsigned*)(db+dbH->trackTableOffset); | |
511 dataBuf = (double*)(db+dbH->dataOffset); | |
512 l2normTable = (double*)(db+dbH->l2normTableOffset); | |
513 timesTable = (double*)(db+dbH->timesTableOffset); | |
514 } | 487 } |
515 | 488 |
516 void audioDB::insert(const char* dbName, const char* inFile){ | 489 void audioDB::insert(const char* dbName, const char* inFile){ |
517 | 490 |
518 initTables(dbName, 1, inFile); | 491 initTables(dbName, 1, inFile); |
647 } | 620 } |
648 } | 621 } |
649 } | 622 } |
650 } | 623 } |
651 | 624 |
652 void audioDB::batchinsert(const char* dbName, const char* inFile){ | 625 void audioDB::batchinsert(const char* dbName, const char* inFile) { |
653 | 626 |
654 if ((dbfid = open (dbName, O_RDWR)) < 0) | 627 initDBHeader(dbName, true); |
655 error("Can't open database file", dbName, "open"); | |
656 get_lock(dbfid, 1); | |
657 | 628 |
658 if(!key) | 629 if(!key) |
659 key=inFile; | 630 key=inFile; |
660 ifstream *filesIn = 0; | 631 ifstream *filesIn = 0; |
661 ifstream *keysIn = 0; | 632 ifstream *keysIn = 0; |
665 error("Could not open batch in file", inFile); | 636 error("Could not open batch in file", inFile); |
666 if(key && key!=inFile) | 637 if(key && key!=inFile) |
667 if(!(keysIn = new ifstream(key))) | 638 if(!(keysIn = new ifstream(key))) |
668 error("Could not open batch key file",key); | 639 error("Could not open batch key file",key); |
669 | 640 |
670 // Get the database header info | |
671 dbH = new dbTableHeaderT(); | |
672 assert(dbH); | |
673 | |
674 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) | |
675 error("error reading db header"); | |
676 | |
677 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 641 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) |
678 error("Must use timestamps with timestamped database","use --times"); | 642 error("Must use timestamps with timestamped database","use --times"); |
679 | 643 |
680 if(dbH->magic!=O2_MAGIC){ | |
681 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; | |
682 error("database file has incorrect header",dbName); | |
683 } | |
684 | |
685 unsigned totalVectors=0; | 644 unsigned totalVectors=0; |
686 char *thisKey = new char[MAXSTR]; | 645 char *thisKey = new char[MAXSTR]; |
687 char *thisFile = new char[MAXSTR]; | 646 char *thisFile = new char[MAXSTR]; |
688 char *thisTimesFileName = new char[MAXSTR]; | 647 char *thisTimesFileName = new char[MAXSTR]; |
689 | 648 |
690 do{ | 649 do{ |
691 filesIn->getline(thisFile,MAXSTR); | 650 filesIn->getline(thisFile,MAXSTR); |
692 if(key && key!=inFile) | 651 if(key && key!=inFile) |
693 keysIn->getline(thisKey,MAXSTR); | 652 keysIn->getline(thisKey,MAXSTR); |
694 else | 653 else |
704 error("can't open feature file for reading", thisFile, "open"); | 663 error("can't open feature file for reading", thisFile, "open"); |
705 | 664 |
706 // find size of input file | 665 // find size of input file |
707 if (thisFile && fstat (infid,&statbuf) < 0) | 666 if (thisFile && fstat (infid,&statbuf) < 0) |
708 error("fstat error finding size of input", "", "fstat"); | 667 error("fstat error finding size of input", "", "fstat"); |
709 | |
710 // mmap the database file | |
711 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, | |
712 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | |
713 error("mmap error for batchinsert into database", "", "mmap"); | |
714 | |
715 // Make some handy tables with correct types | |
716 fileTable= (char*)(db+dbH->fileTableOffset); | |
717 trackTable = (unsigned*)(db+dbH->trackTableOffset); | |
718 dataBuf = (double*)(db+dbH->dataOffset); | |
719 l2normTable = (double*)(db+dbH->l2normTableOffset); | |
720 timesTable = (double*)(db+dbH->timesTableOffset); | |
721 | 668 |
722 // Check that there is room for at least 1 more file | 669 // Check that there is room for at least 1 more file |
723 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) | 670 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) |
724 error("No more room in database","insert failed: reason database is full."); | 671 error("No more room in database","insert failed: reason database is full."); |
725 | 672 |
807 } | 754 } |
808 } | 755 } |
809 // CLEAN UP | 756 // CLEAN UP |
810 munmap(indata,statbuf.st_size); | 757 munmap(indata,statbuf.st_size); |
811 close(infid); | 758 close(infid); |
812 munmap(db,O2_DEFAULTDBSIZE); | |
813 }while(!filesIn->eof()); | 759 }while(!filesIn->eof()); |
814 | 760 |
815 // mmap the database file | |
816 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, | |
817 MAP_SHARED, dbfid, 0)) == (caddr_t) -1) | |
818 error("mmap error for creating database", "", "mmap"); | |
819 | |
820 if(verbosity) { | 761 if(verbosity) { |
821 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " | 762 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " |
822 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; | 763 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; |
823 } | 764 } |
824 | 765 |
825 // Report status | 766 // Report status |
826 status(dbName); | 767 status(dbName); |
827 | |
828 munmap(db,O2_DEFAULTDBSIZE); | |
829 } | 768 } |
830 | 769 |
831 // FIXME: this can't propagate the sequence length argument (used for | 770 // FIXME: this can't propagate the sequence length argument (used for |
832 // dudCount). See adb__status() definition for the other half of | 771 // dudCount). See adb__status() definition for the other half of |
833 // this. -- CSR, 2007-10-01 | 772 // this. -- CSR, 2007-10-01 |