Mercurial > hg > audiodb
changeset 15:69eb22e09772 audiodb-debian
Merged trunk changes -r14:29 onto audiodb-debian branch
author | mas01cr |
---|---|
date | Thu, 02 Aug 2007 11:34:27 +0000 |
parents | c533e9e67374 |
children | 4346f4b717e4 |
files | TODO.txt audioDB.cpp audioDB.h docs/TODO.txt gengetopt.in |
diffstat | 5 files changed, 54 insertions(+), 36 deletions(-) [+] |
line wrap: on
line diff
--- a/TODO.txt Tue Jul 24 12:04:15 2007 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ - -audioDB FIXME: - -command line processing: need better syntax for commands -interactive shell processing: make a query shell - -zero-length input: do something sensible with null files - -
--- a/audioDB.cpp Tue Jul 24 12:04:15 2007 +0000 +++ b/audioDB.cpp Thu Aug 02 11:34:27 2007 +0000 @@ -282,7 +282,7 @@ } return 0; } - + if(args_info.BATCHINSERT_given){ command=COM_BATCHINSERT; dbName=args_info.database_arg; @@ -416,8 +416,8 @@ */ void audioDB::create(const char* dbName){ - if ((dbfid = open (dbName, O_RDWR | O_CREAT | O_TRUNC)) < 0) - error("Can't open database file:", dbName); + if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) + error("Can't open database file", dbName); // go to the location corresponding to the last byte if (lseek (dbfid, O2_DEFAULTDBSIZE - 1, SEEK_SET) == -1) @@ -464,7 +464,7 @@ // open the input file if (inFile && (infid = open (inFile, O_RDONLY)) < 0) - error("can't open feature file for reading", inFile); + error("can't open input file for reading", inFile); // find size of input file if (inFile && fstat (infid,&statbuf) < 0) @@ -690,18 +690,6 @@ } - // mmap the database file - if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, - MAP_SHARED, dbfid, 0)) == (caddr_t) -1) - error("mmap error for creating database"); - - // Make some handy tables with correct types - fileTable= (char*)(db+fileTableOffset); - segTable = (unsigned*)(db+segTableOffset); - dataBuf = (double*)(db+dataoffset); - l2normTable = (double*)(db+l2normTableOffset); - timesTable = (double*)(db+timesTableOffset); - unsigned totalVectors=0; char *thisKey = new char[MAXSTR]; char *thisFile = new char[MAXSTR]; @@ -727,6 +715,18 @@ if (thisFile && fstat (infid,&statbuf) < 0) error("fstat error finding size of input"); + // mmap the database file + if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, dbfid, 0)) == (caddr_t) -1) + error("mmap error for creating database"); + + // Make some handy tables with correct types + fileTable= (char*)(db+fileTableOffset); + segTable = (unsigned*)(db+segTableOffset); + dataBuf = (double*)(db+dataoffset); + l2normTable = (double*)(db+l2normTableOffset); + timesTable = (double*)(db+timesTableOffset); + // Check that there is room for at least 1 more file if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) error("No more room in database","insert failed: reason database is full."); @@ -815,7 +815,13 @@ // CLEAN UP munmap(indata,statbuf.st_size); close(infid); + munmap(db,O2_DEFAULTDBSIZE); }while(!filesIn->eof()); + + // mmap the database file + if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, dbfid, 0)) == (caddr_t) -1) + error("mmap error for creating database"); if(verbosity) cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " @@ -823,7 +829,8 @@ // Report status status(dbName); - + + munmap(db,O2_DEFAULTDBSIZE); } void audioDB::ws_status(const char*dbName, char* hostport){ @@ -1227,10 +1234,9 @@ gettimeofday(&tv1, NULL); - for(seg=0 ; seg < dbH->numFiles ; seg++, processedSegs++){ + for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){ if(segFile){ if(!segFile->eof()){ - //*segFile>>seg; segFile->getline(nextKey,MAXSTR); seg=getKeyPos(nextKey); } @@ -1454,6 +1460,7 @@ SILENCE_THRESH+=*pn; processedSegs++; } + ps = sNorm + segTable[i]; } if(verbosity>1) cerr << "processedSegs: " << processedSegs << endl; @@ -1589,7 +1596,7 @@ } char nextKey [MAXSTR]; - for(seg=0 ; seg < dbH->numFiles ; seg++, processedSegs++){ + for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){ // get segID from file if using a control file if(segFile){ @@ -1600,6 +1607,7 @@ else break; } + segOffset=segOffsetTable[seg]; // numDoubles offset segIndexOffset=segOffset/dbH->dim; // numVectors offset @@ -1660,7 +1668,7 @@ } } - if(verbosity>3){ + if(verbosity>3 && usingTimes){ cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[seg] << endl; cerr.flush(); } @@ -1669,7 +1677,7 @@ (usingTimes && fabs(meanDBdur[seg]-meanQdur)<meanQdur*timesTol)){ - if(verbosity>3){ + if(verbosity>3 && usingTimes){ cerr << "within duration tolerance." << endl; cerr.flush(); } @@ -1684,7 +1692,7 @@ // Threshold on mean L2 of Q and S sequences (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[k]>SILENCE_THRESH && // Are both query and target windows above mean energy? - (qNorm[j]>qMeanL2*.25 && sNorm[k]>sMeanL2[seg]*.25 && diffL2 < DIFF_THRESH ))) + (qNorm[j]>qMeanL2 && sNorm[k]>sMeanL2[seg] && diffL2 < DIFF_THRESH ))) thisDist=DD[j][k]*oneOverWL; else thisDist=0.0; @@ -1714,6 +1722,10 @@ thisDist+=distances[m]; thisDist/=pointNN; + // Let's see the distances then... + if(verbosity>3) + cerr << "d[" << fileTable+seg*O2_FILETABLESIZE << "]=" << thisDist << endl; + // All the seg stuff goes here n=segNN; while(n--){
--- a/audioDB.h Tue Jul 24 12:04:15 2007 +0000 +++ b/audioDB.h Thu Aug 02 11:34:27 2007 +0000 @@ -119,8 +119,8 @@ #define O2_DEFAULT_POINTNN (10U) #define O2_DEFAULT_SEGNN (10U) -//#define O2_DEFAULTDBSIZE (2000000000) // 2GB table size -#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size +#define O2_DEFAULTDBSIZE (2000000000) // 2GB table size +//#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size //#define O2_MAXFILES (1000000) #define O2_MAXFILES (10000U) // 10,000 files @@ -208,6 +208,7 @@ unsigned isServer; unsigned port; double timesTol; + unsigned ignoreCols; // Timers struct timeval tv1;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docs/TODO.txt Thu Aug 02 11:34:27 2007 +0000 @@ -0,0 +1,14 @@ + +audioDB FIXME: + +o fix segfault when query is zero-length +o use periodic memunmap on batch insert +o allow keys to be passed as queries +o rename 'segments' in help to 'files' or 'keys' ? +o test suite +o SOAP to serialize queryFile and keyList +o SOAP to serialize files on insert / batch insert ? + +M. Casey - 24/7/7 + +
--- a/gengetopt.in Tue Jul 24 12:04:15 2007 +0000 +++ b/gengetopt.in Thu Aug 02 11:34:27 2007 +0000 @@ -17,7 +17,7 @@ # -F --featureList featureList.txt # -K --keyList keyList.txt # -T --timePointsList.txt -# -B --BATCHINSERT dbName +# -B --BATCHINSERT dbName # # -Q --QUERY {point|segment|sequence} # -p --qpoint - ordinal position of query vector in feature input file @@ -47,7 +47,7 @@ option "DUMP" D "output all entries: index key size." dependon="database" optional option "L2NORM" L "unit norm vectors and norm all future inserts." dependon="database" optional section "Database Insertion" sectiondesc="The following commands insert feature files, with optional keys and timestamps.\n" -option "INSERT" I "add feature vectors to an existing database." dependon="features" dependon="database" optional +option "INSERT" I "add feature vectors to an existing database." dependon="features" optional option "UPDATE" U "replace inserted vectors associated with key with new input vectors." dependon="features" dependon="key" dependon="database" optional hidden option "features" f "binary series of vectors file {int sz:ieee double[][sz]:eof}." string typestr="filename" dependon="database" optional option "times" t "list of time points (ascii) for feature vectors." string typestr="filename" dependon="features" optional