Mercurial > hg > audiodb
changeset 120:fce73e4afa15
Make a start on refactoring: work the DB header initialization into its
own function, used from both initTables and batchinsert.
This actually removes a mmap()/munmap() pair from inside the batchinsert
loop over files. Not really sure why that was there in the first place;
maybe to sync the header to disk more forcefully? In any case, no
change in behaviour should happen as a result of this code change.
author | mas01cr |
---|---|
date | Tue, 16 Oct 2007 11:47:51 +0000 |
parents | 942e9ab50e9c |
children | 90eab30d2f79 |
files | audioDB.cpp audioDB.h |
diffstat | 2 files changed, 43 insertions(+), 103 deletions(-) [+] |
line wrap: on
line diff
--- a/audioDB.cpp Fri Oct 12 09:16:43 2007 +0000 +++ b/audioDB.cpp Tue Oct 16 11:47:51 2007 +0000 @@ -314,48 +314,6 @@ return -1; // no command found } -/* Make a new database - - The database consists of: - - header - --------------------------------------------------------------------------------- - | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes | - --------------------------------------------------------------------------------- - - - keyTable : list of keys of tracks - -------------------------------------------------------------------------- - | key 256 bytes | - -------------------------------------------------------------------------- - O2_MAXFILES*O2_FILENAMELENGTH - - trackTable : Maps implicit feature index to a feature vector matrix - -------------------------------------------------------------------------- - | numVectors (4 bytes) | - -------------------------------------------------------------------------- - O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(INT) - - featureTable - -------------------------------------------------------------------------- - | v1 v2 v3 ... vd (double) | - -------------------------------------------------------------------------- - O2_MAXFILES * O2_MEANNUMFEATURES * DIM * sizeof(DOUBLE) - - timesTable - -------------------------------------------------------------------------- - | timestamp (double) | - -------------------------------------------------------------------------- - O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE) - - l2normTable - -------------------------------------------------------------------------- - | nm (double) | - -------------------------------------------------------------------------- - O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE) - -*/ - void audioDB::get_lock(int fd, bool exclusive) { struct flock lock; int status; @@ -395,6 +353,18 @@ error("fcntl unlock error", "", "fcntl"); } +/* Make a new database. + + The database consists of: + + * a header (see dbTableHeader struct definition); + * keyTable: list of keys of tracks; + * trackTable: Maps implicit feature index to a feature vector + matrix (sizes of tracks) + * featureTable: Lots of doubles; + * timesTable: time points for each feature vector; + * l2normTable: squared l2norms for each feature vector. +*/ void audioDB::create(const char* dbName){ if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) error("Can't create database file", dbName, "open"); @@ -443,22 +413,12 @@ // FIXME: drop something? Should we even allow this? } -// initTables - memory map files passed as arguments -// Precondition: database has already been created -void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) { +void audioDB::initDBHeader(const char* dbName, bool forWrite) { if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { error("Can't open database file", dbName, "open"); } + get_lock(dbfid, forWrite); - - // open the input file - if (inFile && (infid = open(inFile, O_RDONLY)) < 0) { - error("can't open input file for reading", inFile, "open"); - } - // find size of input file - if (inFile && fstat(infid, &statbuf) < 0) { - error("fstat error finding size of input", inFile, "fstat"); - } // Get the database header info dbH = new dbTableHeaderT(); assert(dbH); @@ -482,6 +442,31 @@ error("database file has incorect version", dbName); } + // mmap the database file + if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0), + MAP_SHARED, dbfid, 0)) == (caddr_t) -1) + error("mmap error for initting tables of database", "", "mmap"); + + // Make some handy tables with correct types + fileTable = (char *) (db + dbH->fileTableOffset); + trackTable = (unsigned *) (db + dbH->trackTableOffset); + dataBuf = (double *) (db + dbH->dataOffset); + l2normTable = (double *) (db + dbH->l2normTableOffset); + timesTable = (double *) (db + dbH->timesTableOffset); +} + +void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) { + + initDBHeader(dbName, forWrite); + + // open the input file + if (inFile && (infid = open(inFile, O_RDONLY)) < 0) { + error("can't open input file for reading", inFile, "open"); + } + // find size of input file + if (inFile && fstat(infid, &statbuf) < 0) { + error("fstat error finding size of input", inFile, "fstat"); + } if(inFile) if(dbH->dim == 0 && dbH->length == 0) // empty database // initialize with input dimensionality @@ -499,18 +484,6 @@ if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) == (caddr_t) -1) error("mmap error for input", inFile, "mmap"); - - // mmap the database file - if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0), - MAP_SHARED, dbfid, 0)) == (caddr_t) -1) - error("mmap error for initting tables of database", "", "mmap"); - - // Make some handy tables with correct types - fileTable= (char*)(db+dbH->fileTableOffset); - trackTable = (unsigned*)(db+dbH->trackTableOffset); - dataBuf = (double*)(db+dbH->dataOffset); - l2normTable = (double*)(db+dbH->l2normTableOffset); - timesTable = (double*)(db+dbH->timesTableOffset); } void audioDB::insert(const char* dbName, const char* inFile){ @@ -649,11 +622,9 @@ } } -void audioDB::batchinsert(const char* dbName, const char* inFile){ +void audioDB::batchinsert(const char* dbName, const char* inFile) { - if ((dbfid = open (dbName, O_RDWR)) < 0) - error("Can't open database file", dbName, "open"); - get_lock(dbfid, 1); + initDBHeader(dbName, true); if(!key) key=inFile; @@ -667,26 +638,14 @@ if(!(keysIn = new ifstream(key))) error("Could not open batch key file",key); - // Get the database header info - dbH = new dbTableHeaderT(); - assert(dbH); - - if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) - error("error reading db header"); - if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) error("Must use timestamps with timestamped database","use --times"); - if(dbH->magic!=O2_MAGIC){ - cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; - error("database file has incorrect header",dbName); - } - unsigned totalVectors=0; char *thisKey = new char[MAXSTR]; char *thisFile = new char[MAXSTR]; char *thisTimesFileName = new char[MAXSTR]; - + do{ filesIn->getline(thisFile,MAXSTR); if(key && key!=inFile) @@ -707,18 +666,6 @@ if (thisFile && fstat (infid,&statbuf) < 0) error("fstat error finding size of input", "", "fstat"); - // mmap the database file - if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, - MAP_SHARED, dbfid, 0)) == (caddr_t) -1) - error("mmap error for batchinsert into database", "", "mmap"); - - // Make some handy tables with correct types - fileTable= (char*)(db+dbH->fileTableOffset); - trackTable = (unsigned*)(db+dbH->trackTableOffset); - dataBuf = (double*)(db+dbH->dataOffset); - l2normTable = (double*)(db+dbH->l2normTableOffset); - timesTable = (double*)(db+dbH->timesTableOffset); - // Check that there is room for at least 1 more file if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) error("No more room in database","insert failed: reason database is full."); @@ -809,14 +756,8 @@ // CLEAN UP munmap(indata,statbuf.st_size); close(infid); - munmap(db,O2_DEFAULTDBSIZE); }while(!filesIn->eof()); - // mmap the database file - if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, - MAP_SHARED, dbfid, 0)) == (caddr_t) -1) - error("mmap error for creating database", "", "mmap"); - if(verbosity) { cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; @@ -824,8 +765,6 @@ // Report status status(dbName); - - munmap(db,O2_DEFAULTDBSIZE); } // FIXME: this can't propagate the sequence length argument (used for
--- a/audioDB.h Fri Oct 12 09:16:43 2007 +0000 +++ b/audioDB.h Tue Oct 16 11:47:51 2007 +0000 @@ -164,6 +164,7 @@ void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); + void initDBHeader(const char *dbName, bool forWrite); void initTables(const char* dbName, bool forWrite, const char* inFile); void unitNorm(double* X, unsigned d, unsigned n, double* qNorm); void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);