comparison audioDB.cpp @ 120:fce73e4afa15

Make a start on refactoring: work the DB header initialization into its own function, used from both initTables and batchinsert. This actually removes a mmap()/munmap() pair from inside the batchinsert loop over files. Not really sure why that was there in the first place; maybe to sync the header to disk more forcefully? In any case, no change in behaviour should happen as a result of this code change.
author mas01cr
date Tue, 16 Oct 2007 11:47:51 +0000
parents bc141fd1dc41
children 90eab30d2f79
comparison
equal deleted inserted replaced
114:942e9ab50e9c 120:fce73e4afa15
312 return 0; 312 return 0;
313 } 313 }
314 return -1; // no command found 314 return -1; // no command found
315 } 315 }
316 316
317 /* Make a new database
318
319 The database consists of:
320
321 header
322 ---------------------------------------------------------------------------------
323 | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes |
324 ---------------------------------------------------------------------------------
325
326
327 keyTable : list of keys of tracks
328 --------------------------------------------------------------------------
329 | key 256 bytes |
330 --------------------------------------------------------------------------
331 O2_MAXFILES*O2_FILENAMELENGTH
332
333 trackTable : Maps implicit feature index to a feature vector matrix
334 --------------------------------------------------------------------------
335 | numVectors (4 bytes) |
336 --------------------------------------------------------------------------
337 O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(INT)
338
339 featureTable
340 --------------------------------------------------------------------------
341 | v1 v2 v3 ... vd (double) |
342 --------------------------------------------------------------------------
343 O2_MAXFILES * O2_MEANNUMFEATURES * DIM * sizeof(DOUBLE)
344
345 timesTable
346 --------------------------------------------------------------------------
347 | timestamp (double) |
348 --------------------------------------------------------------------------
349 O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE)
350
351 l2normTable
352 --------------------------------------------------------------------------
353 | nm (double) |
354 --------------------------------------------------------------------------
355 O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE)
356
357 */
358
359 void audioDB::get_lock(int fd, bool exclusive) { 317 void audioDB::get_lock(int fd, bool exclusive) {
360 struct flock lock; 318 struct flock lock;
361 int status; 319 int status;
362 320
363 lock.l_type = exclusive ? F_WRLCK : F_RDLCK; 321 lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
393 351
394 if (status) 352 if (status)
395 error("fcntl unlock error", "", "fcntl"); 353 error("fcntl unlock error", "", "fcntl");
396 } 354 }
397 355
356 /* Make a new database.
357
358 The database consists of:
359
360 * a header (see dbTableHeader struct definition);
361 * keyTable: list of keys of tracks;
362 * trackTable: Maps implicit feature index to a feature vector
363 matrix (sizes of tracks)
364 * featureTable: Lots of doubles;
365 * timesTable: time points for each feature vector;
366 * l2normTable: squared l2norms for each feature vector.
367 */
398 void audioDB::create(const char* dbName){ 368 void audioDB::create(const char* dbName){
399 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) 369 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
400 error("Can't create database file", dbName, "open"); 370 error("Can't create database file", dbName, "open");
401 get_lock(dbfid, 1); 371 get_lock(dbfid, 1);
402 372
441 411
442 void audioDB::drop(){ 412 void audioDB::drop(){
443 // FIXME: drop something? Should we even allow this? 413 // FIXME: drop something? Should we even allow this?
444 } 414 }
445 415
446 // initTables - memory map files passed as arguments 416 void audioDB::initDBHeader(const char* dbName, bool forWrite) {
447 // Precondition: database has already been created
448 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) {
449 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { 417 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) {
450 error("Can't open database file", dbName, "open"); 418 error("Can't open database file", dbName, "open");
451 } 419 }
420
452 get_lock(dbfid, forWrite); 421 get_lock(dbfid, forWrite);
453
454 // open the input file
455 if (inFile && (infid = open(inFile, O_RDONLY)) < 0) {
456 error("can't open input file for reading", inFile, "open");
457 }
458 // find size of input file
459 if (inFile && fstat(infid, &statbuf) < 0) {
460 error("fstat error finding size of input", inFile, "fstat");
461 }
462 // Get the database header info 422 // Get the database header info
463 dbH = new dbTableHeaderT(); 423 dbH = new dbTableHeaderT();
464 assert(dbH); 424 assert(dbH);
465 425
466 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { 426 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) {
480 440
481 if(dbH->version != O2_FORMAT_VERSION) { 441 if(dbH->version != O2_FORMAT_VERSION) {
482 error("database file has incorect version", dbName); 442 error("database file has incorect version", dbName);
483 } 443 }
484 444
445 // mmap the database file
446 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0),
447 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
448 error("mmap error for initting tables of database", "", "mmap");
449
450 // Make some handy tables with correct types
451 fileTable = (char *) (db + dbH->fileTableOffset);
452 trackTable = (unsigned *) (db + dbH->trackTableOffset);
453 dataBuf = (double *) (db + dbH->dataOffset);
454 l2normTable = (double *) (db + dbH->l2normTableOffset);
455 timesTable = (double *) (db + dbH->timesTableOffset);
456 }
457
458 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) {
459
460 initDBHeader(dbName, forWrite);
461
462 // open the input file
463 if (inFile && (infid = open(inFile, O_RDONLY)) < 0) {
464 error("can't open input file for reading", inFile, "open");
465 }
466 // find size of input file
467 if (inFile && fstat(infid, &statbuf) < 0) {
468 error("fstat error finding size of input", inFile, "fstat");
469 }
485 if(inFile) 470 if(inFile)
486 if(dbH->dim == 0 && dbH->length == 0) // empty database 471 if(dbH->dim == 0 && dbH->length == 0) // empty database
487 // initialize with input dimensionality 472 // initialize with input dimensionality
488 read(infid, &dbH->dim, sizeof(unsigned)); 473 read(infid, &dbH->dim, sizeof(unsigned));
489 else { 474 else {
497 482
498 // mmap the input file 483 // mmap the input file
499 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) 484 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
500 == (caddr_t) -1) 485 == (caddr_t) -1)
501 error("mmap error for input", inFile, "mmap"); 486 error("mmap error for input", inFile, "mmap");
502
503 // mmap the database file
504 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0),
505 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
506 error("mmap error for initting tables of database", "", "mmap");
507
508 // Make some handy tables with correct types
509 fileTable= (char*)(db+dbH->fileTableOffset);
510 trackTable = (unsigned*)(db+dbH->trackTableOffset);
511 dataBuf = (double*)(db+dbH->dataOffset);
512 l2normTable = (double*)(db+dbH->l2normTableOffset);
513 timesTable = (double*)(db+dbH->timesTableOffset);
514 } 487 }
515 488
516 void audioDB::insert(const char* dbName, const char* inFile){ 489 void audioDB::insert(const char* dbName, const char* inFile){
517 490
518 initTables(dbName, 1, inFile); 491 initTables(dbName, 1, inFile);
647 } 620 }
648 } 621 }
649 } 622 }
650 } 623 }
651 624
652 void audioDB::batchinsert(const char* dbName, const char* inFile){ 625 void audioDB::batchinsert(const char* dbName, const char* inFile) {
653 626
654 if ((dbfid = open (dbName, O_RDWR)) < 0) 627 initDBHeader(dbName, true);
655 error("Can't open database file", dbName, "open");
656 get_lock(dbfid, 1);
657 628
658 if(!key) 629 if(!key)
659 key=inFile; 630 key=inFile;
660 ifstream *filesIn = 0; 631 ifstream *filesIn = 0;
661 ifstream *keysIn = 0; 632 ifstream *keysIn = 0;
665 error("Could not open batch in file", inFile); 636 error("Could not open batch in file", inFile);
666 if(key && key!=inFile) 637 if(key && key!=inFile)
667 if(!(keysIn = new ifstream(key))) 638 if(!(keysIn = new ifstream(key)))
668 error("Could not open batch key file",key); 639 error("Could not open batch key file",key);
669 640
670 // Get the database header info
671 dbH = new dbTableHeaderT();
672 assert(dbH);
673
674 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT))
675 error("error reading db header");
676
677 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 641 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
678 error("Must use timestamps with timestamped database","use --times"); 642 error("Must use timestamps with timestamped database","use --times");
679 643
680 if(dbH->magic!=O2_MAGIC){
681 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl;
682 error("database file has incorrect header",dbName);
683 }
684
685 unsigned totalVectors=0; 644 unsigned totalVectors=0;
686 char *thisKey = new char[MAXSTR]; 645 char *thisKey = new char[MAXSTR];
687 char *thisFile = new char[MAXSTR]; 646 char *thisFile = new char[MAXSTR];
688 char *thisTimesFileName = new char[MAXSTR]; 647 char *thisTimesFileName = new char[MAXSTR];
689 648
690 do{ 649 do{
691 filesIn->getline(thisFile,MAXSTR); 650 filesIn->getline(thisFile,MAXSTR);
692 if(key && key!=inFile) 651 if(key && key!=inFile)
693 keysIn->getline(thisKey,MAXSTR); 652 keysIn->getline(thisKey,MAXSTR);
694 else 653 else
704 error("can't open feature file for reading", thisFile, "open"); 663 error("can't open feature file for reading", thisFile, "open");
705 664
706 // find size of input file 665 // find size of input file
707 if (thisFile && fstat (infid,&statbuf) < 0) 666 if (thisFile && fstat (infid,&statbuf) < 0)
708 error("fstat error finding size of input", "", "fstat"); 667 error("fstat error finding size of input", "", "fstat");
709
710 // mmap the database file
711 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
712 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
713 error("mmap error for batchinsert into database", "", "mmap");
714
715 // Make some handy tables with correct types
716 fileTable= (char*)(db+dbH->fileTableOffset);
717 trackTable = (unsigned*)(db+dbH->trackTableOffset);
718 dataBuf = (double*)(db+dbH->dataOffset);
719 l2normTable = (double*)(db+dbH->l2normTableOffset);
720 timesTable = (double*)(db+dbH->timesTableOffset);
721 668
722 // Check that there is room for at least 1 more file 669 // Check that there is room for at least 1 more file
723 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) 670 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int))))
724 error("No more room in database","insert failed: reason database is full."); 671 error("No more room in database","insert failed: reason database is full.");
725 672
807 } 754 }
808 } 755 }
809 // CLEAN UP 756 // CLEAN UP
810 munmap(indata,statbuf.st_size); 757 munmap(indata,statbuf.st_size);
811 close(infid); 758 close(infid);
812 munmap(db,O2_DEFAULTDBSIZE);
813 }while(!filesIn->eof()); 759 }while(!filesIn->eof());
814 760
815 // mmap the database file
816 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
817 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
818 error("mmap error for creating database", "", "mmap");
819
820 if(verbosity) { 761 if(verbosity) {
821 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " 762 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
822 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; 763 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl;
823 } 764 }
824 765
825 // Report status 766 // Report status
826 status(dbName); 767 status(dbName);
827
828 munmap(db,O2_DEFAULTDBSIZE);
829 } 768 }
830 769
831 // FIXME: this can't propagate the sequence length argument (used for 770 // FIXME: this can't propagate the sequence length argument (used for
832 // dudCount). See adb__status() definition for the other half of 771 // dudCount). See adb__status() definition for the other half of
833 // this. -- CSR, 2007-10-01 772 // this. -- CSR, 2007-10-01