changeset 256:4dcb09f5fe85

Commit patch deprecating the --size argument, replacing it with --ntracks, --datadims and --datasize. These names are not ideal, but will serve for now.
author mas01cr
date Wed, 16 Apr 2008 09:59:43 +0000
parents fe922b9d87f8
children fc6ee42c7e55
files audioDB.cpp audioDB.h common.cpp create.cpp dump.cpp gengetopt.in insert.cpp query.cpp reporter.h
diffstat 9 files changed, 86 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.cpp	Sat Apr 12 13:28:30 2008 +0000
+++ b/audioDB.cpp	Wed Apr 16 09:59:43 2008 +0000
@@ -136,10 +136,32 @@
   }
 
   if(args_info.size_given) {
+    if(args_info.datasize_given) {
+      error("both --size and --datasize given", "");
+    }
+    if(args_info.ntracks_given) {
+      error("both --size and --ntracks given", "");
+    }
+    if(args_info.datadim_given) {
+      error("both --size and --datadim given", "");
+    }
     if (args_info.size_arg < 50 || args_info.size_arg > 32000) {
       error("Size out of range", "");
     }
-    size = (off_t) args_info.size_arg * 1000000;
+    double ratio = (double) args_info.size_arg * 1000000 / ((double) O2_DEFAULTDBSIZE);
+    /* FIXME: what's the safe way of doing this? */
+    datasize = (unsigned int) ceil(datasize * ratio);
+    ntracks = (unsigned int) ceil(ntracks * ratio);
+  } else {
+    if(args_info.datasize_given) {
+      datasize = args_info.datasize_arg;
+    }
+    if(args_info.ntracks_given) {
+      ntracks = args_info.ntracks_arg;
+    }
+    if(args_info.datadim_given) {
+      datadim = args_info.datadim_arg;
+    }
   }
 
   if(args_info.radius_given) {
--- a/audioDB.h	Sat Apr 12 13:28:30 2008 +0000
+++ b/audioDB.h	Wed Apr 16 09:59:43 2008 +0000
@@ -57,10 +57,14 @@
 //#define O2_DEFAULTDBSIZE (4000000000) // 4GB table size
 #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
 
+#define O2_DEFAULT_DATASIZE (1355U) // in MB
+#define O2_DEFAULT_NTRACKS (20000U)
+#define O2_DEFAULT_DATADIM (9U)
+
 #define O2_MAXFILES (20000U)
 #define O2_MAXFILESTR (256U)
-#define O2_FILETABLESIZE (O2_MAXFILESTR)
-#define O2_TRACKTABLESIZE (sizeof(unsigned))
+#define O2_FILETABLE_ENTRY_SIZE (O2_MAXFILESTR)
+#define O2_TRACKTABLE_ENTRY_SIZE (sizeof(unsigned))
 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
 #define O2_MEANNUMVECTORS (1000U)
 #define O2_MAXDIM (1000U)
@@ -170,7 +174,12 @@
 
   // Flags and parameters
   unsigned verbosity;   // how much do we want to know?
-  off_t size; // given size (for creation)
+
+  //off_t size; // given size (for creation)
+  unsigned datasize; // size in MB
+  unsigned ntracks;
+  unsigned datadim;
+
   unsigned queryType; // point queries default
   unsigned pointNN;   // how many point NNs ?
   unsigned trackNN;   // how many track NNs ?
@@ -281,7 +290,9 @@
   powerTableLength(0), \
   l2normTableLength(0), \
   verbosity(1), \
-  size(O2_DEFAULTDBSIZE), \
+  datasize(O2_DEFAULT_DATASIZE), \
+  ntracks(O2_DEFAULT_NTRACKS), \
+  datadim(O2_DEFAULT_DATADIM), \
   queryType(O2_POINT_QUERY), \
   pointNN(O2_DEFAULT_POINTNN), \
   trackNN(O2_DEFAULT_TRACKNN), \
--- a/common.cpp	Sat Apr 12 13:28:30 2008 +0000
+++ b/common.cpp	Wed Apr 16 09:59:43 2008 +0000
@@ -115,8 +115,8 @@
       powerTableLength = dbH->l2normTableOffset - dbH->powerTableOffset;
       l2normTableLength = dbH->dbSize - dbH->l2normTableOffset;
     } else {
-      fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE);
-      trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE);
+      fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLE_ENTRY_SIZE);
+      trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLE_ENTRY_SIZE);
       dataBufLength = ALIGN_PAGE_UP(dbH->length);
       timesTableLength = ALIGN_PAGE_UP(2*(dbH->length / dbH->dim));
       powerTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
--- a/create.cpp	Sat Apr 12 13:28:30 2008 +0000
+++ b/create.cpp	Wed Apr 16 09:59:43 2008 +0000
@@ -24,7 +24,7 @@
   dbH = new dbTableHeaderT();
   assert(dbH);
 
-  unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE);
+  //unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE);
 
   // Initialize header
   dbH->magic = O2_MAGIC;
@@ -35,17 +35,21 @@
   dbH->headerSize = O2_HEADERSIZE;
   dbH->length = 0;
   dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE);
-  dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles);
-  dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles);
-  dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double));
-  dbH->powerTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double));
-  dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->powerTableOffset - 2*maxfiles*O2_MEANNUMVECTORS*sizeof(double));
-  dbH->dbSize = size;
+  dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
+  dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLE_ENTRY_SIZE*ntracks);
+
+  off_t databytes = ((off_t) datasize) * 1024 * 1024;
+  off_t auxbytes = databytes / datadim;
+
+  dbH->timesTableOffset = ALIGN_PAGE_UP(dbH->dataOffset + databytes);
+  dbH->powerTableOffset = ALIGN_PAGE_UP(dbH->timesTableOffset + 2*auxbytes);
+  dbH->l2normTableOffset = ALIGN_PAGE_UP(dbH->powerTableOffset + auxbytes);
+  dbH->dbSize = ALIGN_PAGE_UP(dbH->l2normTableOffset + auxbytes);
 
   write(dbfid, dbH, O2_HEADERSIZE);
 
   // go to the location corresponding to the last byte
-  if (lseek (dbfid, size - 1, SEEK_SET) == -1)
+  if (lseek (dbfid, dbH->dbSize - 1, SEEK_SET) == -1)
     error("lseek error in db file", "", "lseek");
 
   // write a dummy byte at the last location
--- a/dump.cpp	Sat Apr 12 13:28:30 2008 +0000
+++ b/dump.cpp	Wed Apr 16 09:59:43 2008 +0000
@@ -62,7 +62,7 @@
   double *data_buffer;
   size_t data_buffer_size;
   for(unsigned k = 0; k < dbH->numFiles; k++) {
-    fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE);
+    fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLE_ENTRY_SIZE);
     snprintf(fName, 256, "%05d.features", k);
     if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
       error("error creating feature file", fName, "open");
@@ -130,7 +130,7 @@
     } 
 
     pos += trackTable[k];
-    std::cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << std::endl;
+    std::cout << fileTable+k*O2_FILETABLE_ENTRY_SIZE << " " << trackTable[k] << std::endl;
   }
 
   FILE *scriptFile;
@@ -142,7 +142,12 @@
 \n\
 if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\
 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\
-\"${AUDIODB}\" -d \"$1\" -N --size=%d\n", (int) (dbH->dbSize / 1000000));
+\"${AUDIODB}\" -d \"$1\" -N --datasize=%d --ntracks=%d --datadim=%d\n",
+          (int) ((dbH->timesTableOffset - dbH->dataOffset) / (1024*1024)),
+          // fileTable entries (char[256]) are bigger than trackTable
+          // (int), so the granularity of page aligning is finer.
+          (int) ((dbH->trackTableOffset - dbH->fileTableOffset) / O2_FILETABLE_ENTRY_SIZE),
+          (int) ceil(((double) (dbH->timesTableOffset - dbH->dataOffset)) / ((double) (dbH->dbSize - dbH->l2normTableOffset))));
   if(dbH->flags & O2_FLAG_L2NORM) {
     fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n");
   }
--- a/gengetopt.in	Sat Apr 12 13:28:30 2008 +0000
+++ b/gengetopt.in	Wed Apr 16 09:59:43 2008 +0000
@@ -5,11 +5,20 @@
 option "verbosity" v "level of detail of operational information." int typestr="detail" default="1" optional
 text "\nDatabase commands are UPPER CASE. Command options are lower case.\n" 
 text ""
-section "Database Setup" sectiondesc="All database operations require a database argument."
+section "Database Operations" sectiondesc="All database operations require a database argument."
 
 option "database" d "database file required by Database commands." string typestr="filename" optional
+
+section "Database Creation" sectiondesc="Creating a new database file."
+
 option "NEW"    N "make a new (initially empty) database." dependon="database" optional
-option "size"   - "size of database file (in MB)" int dependon="NEW" default="2000" optional
+option "size"   - "size of database file (in MB)" int dependon="NEW" optional hidden
+option "datasize" - "size of data table requested (in MB)" int dependon="NEW" default="1355" optional
+option "ntracks" - "capacity of database for tracks" int dependon="NEW" default="20000" optional
+option "datadim" - "dimensionality of stored data" int dependon="NEW" default="9" optional
+
+section "Database Maintenance" sectiondesc="Querying, tweaking and dumping databases."
+
 option "STATUS" S "output database information to stdout." dependon="database" optional
 option "DUMP"   D "output all entries: index key size." dependon="database" optional
 option "output" - "output directory" string dependon="DUMP" default="audioDB.dump" optional
--- a/insert.cpp	Sat Apr 12 13:28:30 2008 +0000
+++ b/insert.cpp	Wed Apr 16 09:59:43 2008 +0000
@@ -4,8 +4,8 @@
   unsigned int fmaxfiles, tmaxfiles;
   unsigned int maxfiles;
 
-  fmaxfiles = fileTableLength / O2_FILETABLESIZE;
-  tmaxfiles = trackTableLength / O2_TRACKTABLESIZE;
+  fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
+  tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
   maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
   return(dbH->numFiles < maxfiles);
 }
@@ -42,7 +42,7 @@
   // Linear scan of filenames check for pre-existing feature
   unsigned alreadyInserted=0;
   for(unsigned k=0; k<dbH->numFiles; k++)
-    if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){
+    if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)+1)==0){
       alreadyInserted=1;
       break;
     }
@@ -64,7 +64,7 @@
     return;
   }
 
-  strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
+  strncpy(fileTable + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, key, strlen(key));
 
   off_t insertoffset = dbH->length;// Store current state
 
@@ -236,7 +236,7 @@
     unsigned alreadyInserted=0;
   
     for(unsigned k=0; k<dbH->numFiles; k++)
-      if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){
+      if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, thisKey, strlen(thisKey)+1)==0){
 	alreadyInserted=1;
 	break;
       }
@@ -285,7 +285,7 @@
             close(thispowerfd);
           }
         }
-	strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
+	strncpy(fileTable + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, thisKey, strlen(thisKey));
   
 	off_t insertoffset = dbH->length;// Store current state
 
--- a/query.cpp	Sat Apr 12 13:28:30 2008 +0000
+++ b/query.cpp	Wed Apr 16 09:59:43 2008 +0000
@@ -55,7 +55,7 @@
 // return ordinal position of key in keyTable
 unsigned audioDB::getKeyPos(char* key){  
   for(unsigned k=0; k<dbH->numFiles; k++)
-    if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
+    if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key))==0)
       return k;
   error("Key not found",key);
   return O2_ERR_KEYNOTFOUND;
--- a/reporter.h	Sat Apr 12 13:28:30 2008 +0000
+++ b/reporter.h	Wed Apr 16 09:59:43 2008 +0000
@@ -90,7 +90,7 @@
   if(adbQueryResponse==0) {
     for(rit = v.rbegin(); rit < v.rend(); rit++) {
       r = *rit;
-      std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " ";
+      std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " ";
       std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl;
     }
   } else {
@@ -109,7 +109,7 @@
       adbQueryResponse->result.Dist[k] = r.dist;
       adbQueryResponse->result.Qpos[k] = r.qpos;
       adbQueryResponse->result.Spos[k] = r.spos;
-      snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE);
+      snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE);
     }
   }
 }
@@ -191,7 +191,7 @@
   if(adbQueryResponse==0) {
     for(rit = v.rbegin(); rit < v.rend(); rit++) {
       r = *rit;
-      std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " ";
+      std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " ";
       std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl;
     }
   } else {
@@ -210,7 +210,7 @@
       adbQueryResponse->result.Dist[k] = r.dist;
       adbQueryResponse->result.Qpos[k] = r.qpos;
       adbQueryResponse->result.Spos[k] = r.spos;
-      snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE);
+      snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE);
     }
   }
 }
@@ -283,7 +283,7 @@
   if(adbQueryResponse==0) {
     for(rit = v.rbegin(); rit < v.rend(); rit++) {
       r = *rit;
-      std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " " << r.count << std::endl;
+      std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.count << std::endl;
     }
   } else {
     // FIXME
@@ -349,7 +349,7 @@
   if(adbQueryResponse==0) {
     for(rit = v.rbegin(); rit < v.rend(); rit++) {
       r = *rit;
-      std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " " << r.dist << std::endl;
+      std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.dist << std::endl;
       for(int k=0; k < (int)pointNN; k++){
 	NNresult rk = point_queues[r.trackID].top();
 	std::cout << rk.dist << " " << rk.qpos << " " << rk.spos << std::endl;
@@ -372,7 +372,7 @@
       adbQueryResponse->result.Dist[k] = r.dist;
       adbQueryResponse->result.Qpos[k] = r.qpos;
       adbQueryResponse->result.Spos[k] = r.spos;
-      snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE);
+      snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE);
     }
   }
   // clean up
@@ -469,7 +469,7 @@
   if(adbQueryResponse==0) {
     for(rit = v.rbegin(); rit < v.rend(); rit++) {
       r = *rit;
-      std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " " << r.count << std::endl;
+      std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.count << std::endl;
       int qsize=point_queues[r.trackID].size();
       for(int k=0; k < qsize; k++){
 	NNresult rk = point_queues[r.trackID].top();