changeset 192:5f3d260ba71d no-big-mmap

Create and status of huge databases works. dump also works, but is as yet untested beyond the 4GB limit.
author mas01cr
date Mon, 19 Nov 2007 18:37:12 +0000
parents b7400fabbb94
children
files audioDB.cpp audioDB.h
diffstat 2 files changed, 16 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.cpp	Mon Nov 19 17:19:46 2007 +0000
+++ b/audioDB.cpp	Mon Nov 19 18:37:12 2007 +0000
@@ -164,10 +164,10 @@
   }
 
   if(args_info.size_given) {
-    if (args_info.size_arg < 50 || args_info.size_arg > 4000) {
+    if (args_info.size_arg < 50 || args_info.size_arg > 32000) {
       error("Size out of range", "");
     }
-    size = args_info.size_arg * 1000000;
+    size = (off_t) args_info.size_arg * 1000000;
   }
 
   if(args_info.radius_given){
@@ -603,7 +603,7 @@
 
   strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
 
-  unsigned insertoffset = dbH->length;// Store current state
+  off_t insertoffset = dbH->length;// Store current state
 
   // Check times status and insert times from file
   unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
@@ -771,7 +771,7 @@
 	  thisTimesFile=new ifstream(thisTimesFileName,ios::in);
 	  if(!thisTimesFile->is_open())
 	    error("Cannot open timestamp file",thisTimesFileName);
-	  unsigned insertoffset=dbH->length;
+	  off_t insertoffset=dbH->length;
 	  unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
 	  double* timesdata=timesTable+timesoffset;
           if(timesoffset + numVectors > timesTableLength) {
@@ -784,7 +784,7 @@
 	  
 	strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
   
-	unsigned insertoffset = dbH->length;// Store current state
+	off_t insertoffset = dbH->length;// Store current state
 
 	// Increment file count
 	dbH->numFiles++;  
@@ -1020,7 +1020,7 @@
 \n\
 if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\
 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\
-\"${AUDIODB}\" -d \"$1\" -N --size=%d\n", dbH->dbSize / 1000000);
+\"${AUDIODB}\" -d \"$1\" -N --size=%d\n", (int) (dbH->dbSize / 1000000));
   if(dbH->flags & O2_FLAG_L2NORM) {
     fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n");
   }
@@ -1049,6 +1049,7 @@
   forWrite = true;
   initTables(dbName, 0);
   if(dbH->length>0){
+    /* FIXME: should probably be uint64_t */
     unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
     CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
     unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
--- a/audioDB.h	Mon Nov 19 17:19:46 2007 +0000
+++ b/audioDB.h	Mon Nov 19 18:37:12 2007 +0000
@@ -103,13 +103,13 @@
   // scale to really large collections easily but it works around the
   // problem.  Expanding to 64 bits will of course need a change in
   // file format version.  -- CSR, 2007-10-05
-  uint32_t length;
-  uint32_t fileTableOffset;
-  uint32_t trackTableOffset;
-  uint32_t dataOffset;
-  uint32_t l2normTableOffset;
-  uint32_t timesTableOffset;
-  uint32_t dbSize;
+  off_t length;
+  off_t fileTableOffset;
+  off_t trackTableOffset;
+  off_t dataOffset;
+  off_t l2normTableOffset;
+  off_t timesTableOffset;
+  off_t dbSize;
 } dbTableHeaderT, *dbTableHeaderPtr;
 
 
@@ -148,13 +148,13 @@
 
   size_t fileTableLength;
   size_t trackTableLength;
-  size_t dataBufLength;
+  off_t dataBufLength;
   size_t timesTableLength;
   size_t l2normTableLength;
 
   // Flags and parameters
   unsigned verbosity;   // how much do we want to know?
-  unsigned size; // given size (for creation)
+  off_t size; // given size (for creation)
   unsigned queryType; // point queries default
   unsigned pointNN;   // how many point NNs ?
   unsigned trackNN;   // how many track NNs ?