changeset 15:69eb22e09772 audiodb-debian

Merged trunk changes -r14:29 onto audiodb-debian branch
author mas01cr
date Thu, 02 Aug 2007 11:34:27 +0000
parents c533e9e67374
children 4346f4b717e4
files TODO.txt audioDB.cpp audioDB.h docs/TODO.txt gengetopt.in
diffstat 5 files changed, 54 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/TODO.txt	Tue Jul 24 12:04:15 2007 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-
-audioDB FIXME:
-
-command line processing: need better syntax for commands
-interactive shell processing: make a query shell
-
-zero-length input: do something sensible with null files
-
-
--- a/audioDB.cpp	Tue Jul 24 12:04:15 2007 +0000
+++ b/audioDB.cpp	Thu Aug 02 11:34:27 2007 +0000
@@ -282,7 +282,7 @@
    }
    return 0;
  }
-
+ 
  if(args_info.BATCHINSERT_given){
    command=COM_BATCHINSERT;
    dbName=args_info.database_arg;
@@ -416,8 +416,8 @@
 */
 
 void audioDB::create(const char* dbName){
-  if ((dbfid = open (dbName, O_RDWR | O_CREAT | O_TRUNC)) < 0)
-    error("Can't open database file:", dbName);
+  if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
+    error("Can't open database file", dbName);
 
   // go to the location corresponding to the last byte
   if (lseek (dbfid, O2_DEFAULTDBSIZE - 1, SEEK_SET) == -1)
@@ -464,7 +464,7 @@
   
   // open the input file
   if (inFile && (infid = open (inFile, O_RDONLY)) < 0)
-    error("can't open feature file for reading", inFile);
+    error("can't open input file for reading", inFile);
 
   // find size of input file
   if (inFile && fstat (infid,&statbuf) < 0)
@@ -690,18 +690,6 @@
   }
 
   
-  // mmap the database file
-  if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
-			 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
-    error("mmap error for creating database");
-
-  // Make some handy tables with correct types
-  fileTable= (char*)(db+fileTableOffset);
-  segTable = (unsigned*)(db+segTableOffset);
-  dataBuf  = (double*)(db+dataoffset);
-  l2normTable = (double*)(db+l2normTableOffset);
-  timesTable = (double*)(db+timesTableOffset);
-
   unsigned totalVectors=0;
   char *thisKey = new char[MAXSTR];
   char *thisFile = new char[MAXSTR];
@@ -727,6 +715,18 @@
     if (thisFile && fstat (infid,&statbuf) < 0)
       error("fstat error finding size of input");
 
+    // mmap the database file
+    if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
+			   MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
+      error("mmap error for creating database");
+    
+    // Make some handy tables with correct types
+    fileTable= (char*)(db+fileTableOffset);
+    segTable = (unsigned*)(db+segTableOffset);
+    dataBuf  = (double*)(db+dataoffset);
+    l2normTable = (double*)(db+l2normTableOffset);
+    timesTable = (double*)(db+timesTableOffset);
+
     // Check that there is room for at least 1 more file
     if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int))))
       error("No more room in database","insert failed: reason database is full.");
@@ -815,7 +815,13 @@
     // CLEAN UP
     munmap(indata,statbuf.st_size);
     close(infid);
+    munmap(db,O2_DEFAULTDBSIZE);
   }while(!filesIn->eof());
+
+  // mmap the database file
+  if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
+			 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
+    error("mmap error for creating database");
   
   if(verbosity)
     cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " 
@@ -823,7 +829,8 @@
   
   // Report status
   status(dbName);
-
+  
+  munmap(db,O2_DEFAULTDBSIZE);
 }
 
 void audioDB::ws_status(const char*dbName, char* hostport){
@@ -1227,10 +1234,9 @@
 
   gettimeofday(&tv1, NULL); 
         
-  for(seg=0 ; seg < dbH->numFiles ; seg++, processedSegs++){
+  for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){
     if(segFile){
       if(!segFile->eof()){
-	//*segFile>>seg;
 	segFile->getline(nextKey,MAXSTR);
 	seg=getKeyPos(nextKey);
       }
@@ -1454,6 +1460,7 @@
       SILENCE_THRESH+=*pn;
       processedSegs++;
     }
+    ps = sNorm + segTable[i];
   }
   if(verbosity>1)
     cerr << "processedSegs: " << processedSegs << endl;
@@ -1589,7 +1596,7 @@
   }
 
   char nextKey [MAXSTR];
-  for(seg=0 ; seg < dbH->numFiles ; seg++, processedSegs++){
+  for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){
 
     // get segID from file if using a control file
     if(segFile){
@@ -1600,6 +1607,7 @@
       else
 	break;
     }
+
     segOffset=segOffsetTable[seg];     // numDoubles offset
     segIndexOffset=segOffset/dbH->dim; // numVectors offset
 
@@ -1660,7 +1668,7 @@
 	  }
       }
       
-      if(verbosity>3){
+      if(verbosity>3 && usingTimes){
 	cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[seg] << endl;
 	cerr.flush();
       }
@@ -1669,7 +1677,7 @@
 	 (usingTimes 
 	  && fabs(meanDBdur[seg]-meanQdur)<meanQdur*timesTol)){
 
-	if(verbosity>3){
+	if(verbosity>3 && usingTimes){
 	  cerr << "within duration tolerance." << endl;
 	  cerr.flush();
 	}
@@ -1684,7 +1692,7 @@
 	       // Threshold on mean L2 of Q and S sequences
 	       (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[k]>SILENCE_THRESH && 
 		// Are both query and target windows above mean energy?
-		(qNorm[j]>qMeanL2*.25 && sNorm[k]>sMeanL2[seg]*.25 &&  diffL2 < DIFF_THRESH )))
+		(qNorm[j]>qMeanL2 && sNorm[k]>sMeanL2[seg] &&  diffL2 < DIFF_THRESH )))
 	      thisDist=DD[j][k]*oneOverWL;
 	    else
 	      thisDist=0.0;
@@ -1714,6 +1722,10 @@
 	  thisDist+=distances[m];
 	thisDist/=pointNN;
 	
+	// Let's see the distances then...
+	if(verbosity>3)
+	  cerr << "d[" << fileTable+seg*O2_FILETABLESIZE << "]=" << thisDist << endl;
+
 	// All the seg stuff goes here
 	n=segNN;
 	while(n--){
--- a/audioDB.h	Tue Jul 24 12:04:15 2007 +0000
+++ b/audioDB.h	Thu Aug 02 11:34:27 2007 +0000
@@ -119,8 +119,8 @@
 #define O2_DEFAULT_POINTNN (10U)
 #define O2_DEFAULT_SEGNN  (10U)
 
-//#define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
-#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
+#define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
+//#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size
 
 //#define O2_MAXFILES (1000000)
 #define O2_MAXFILES (10000U)           // 10,000 files
@@ -208,6 +208,7 @@
   unsigned isServer;
   unsigned port;
   double timesTol;
+  unsigned ignoreCols;
 
   // Timers
   struct timeval tv1;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/TODO.txt	Thu Aug 02 11:34:27 2007 +0000
@@ -0,0 +1,14 @@
+
+audioDB FIXME:
+
+o fix segfault when query is zero-length
+o use periodic memunmap on batch insert
+o allow keys to be passed as queries
+o rename 'segments' in help to 'files' or 'keys' ?
+o test suite
+o SOAP to serialize queryFile and keyList
+o SOAP to serialize files on insert / batch insert ?
+
+M. Casey - 24/7/7
+
+
--- a/gengetopt.in	Tue Jul 24 12:04:15 2007 +0000
+++ b/gengetopt.in	Thu Aug 02 11:34:27 2007 +0000
@@ -17,7 +17,7 @@
 #     -F --featureList featureList.txt 
 #     -K --keyList keyList.txt
 #     -T --timePointsList.txt
-#     -B --BATCHINSERT dbName 
+#     -B --BATCHINSERT dbName
 #
 #     -Q --QUERY {point|segment|sequence} 
 #     -p --qpoint - ordinal position of query vector in feature input file
@@ -47,7 +47,7 @@
 option "DUMP"   D "output all entries: index key size." dependon="database" optional
 option "L2NORM" L "unit norm vectors and norm all future inserts." dependon="database" optional
 section "Database Insertion" sectiondesc="The following commands insert feature files, with optional keys and timestamps.\n"
-option "INSERT"      I "add feature vectors to an existing database." dependon="features" dependon="database" optional
+option "INSERT"      I "add feature vectors to an existing database." dependon="features" optional
 option "UPDATE"      U "replace inserted vectors associated with key with new input vectors." dependon="features" dependon="key" dependon="database" optional hidden
 option "features" f "binary series of vectors file {int sz:ieee double[][sz]:eof}." string typestr="filename" dependon="database" optional
 option "times"    t "list of time points (ascii) for feature vectors." string typestr="filename" dependon="features" optional