changeset 187:530154ca4cf5 no-big-mmap

Wow, two changes for the price of one * remove dataBuf usage from trackPointQuery() * make the restrict-list tests pass: ** lseek() to the right place if we're actually not doing tracks in sequential-order; ** deal with the off-by-one error in reading in lines from trackFile.
author mas01cr
date Fri, 16 Nov 2007 16:31:36 +0000
parents d5ae11d6cd2c
children 0caa733d48c5
files audioDB.cpp
diffstat 1 files changed, 70 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.cpp	Fri Nov 16 16:27:28 2007 +0000
+++ b/audioDB.cpp	Fri Nov 16 16:31:36 2007 +0000
@@ -599,7 +599,11 @@
   // Check times status and insert times from file
   unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
   double* timesdata=timesTable+timesoffset;
-  assert(timesdata+numVectors<l2normTable);
+  /* FIXME: work out how to check against wandering off the end of the
+     times table.
+
+     assert(timesdata+numVectors<l2normTable);
+  */
   insertTimeStamps(numVectors, timesFile, timesdata);
 
   // Increment file count
@@ -1346,9 +1350,9 @@
     }
   
   // build track offset table
-  unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
+  off_t *trackOffsetTable = new off_t[dbH->numFiles];
   unsigned cumTrack=0;
-  unsigned trackIndexOffset;
+  off_t trackIndexOffset;
   for(k=0; k<dbH->numFiles;k++){
     trackOffsetTable[k]=cumTrack;
     cumTrack+=trackTable[k]*dbH->dim;
@@ -1357,18 +1361,29 @@
   char nextKey[MAXSTR];
 
   gettimeofday(&tv1, NULL); 
+
+  size_t data_buffer_size = 0;
+  double *data_buffer = 0;
+  lseek(dbfid, dbH->dataOffset, SEEK_SET);
         
   for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
-    if(trackFile){
-      if(!trackFile->eof()){
-	trackFile->getline(nextKey,MAXSTR);
-	track=getKeyPos(nextKey);
+
+    trackOffset = trackOffsetTable[track];     // numDoubles offset
+
+    // get trackID from file if using a control file
+    if(trackFile) {
+      trackFile->getline(nextKey,MAXSTR);
+      if(!trackFile->eof()) {
+	track = getKeyPos(nextKey);
+        trackOffset = trackOffsetTable[track];
+        lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
+      } else {
+	break;
       }
-      else
-	break;
     }
-    trackOffset=trackOffsetTable[track];     // numDoubles offset
+
     trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
+
     if(verbosity>7) {
       cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush();
     }
@@ -1381,9 +1396,26 @@
       j=1;
     else
       j=numVectors;
+
+    if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
+      if(data_buffer) {
+        free(data_buffer);
+      }
+      { 
+        data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
+        void *tmp = malloc(data_buffer_size);
+        if (tmp == NULL) {
+          error("error allocating data buffer");
+        }
+        data_buffer = (double *) tmp;
+      }
+    }
+
+    read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
+
     while(j--){
       k=trackTable[track];  // number of vectors in track
-      data=dataBuf+trackOffset; // data for track
+      data=data_buffer; // data for track
       while(k--){
 	thisDist=0;
 	l=dbH->dim;
@@ -1453,6 +1485,9 @@
       sIndexes[k]=~0;    
     }
   } // tracks
+
+  free(data_buffer);
+
   gettimeofday(&tv2, NULL); 
 
   if(verbosity>1) {
@@ -1738,9 +1773,9 @@
   double* dp;
 
   // build track offset table
-  unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
+  off_t *trackOffsetTable = new off_t[dbH->numFiles];
   unsigned cumTrack=0;
-  unsigned trackIndexOffset;
+  off_t trackIndexOffset;
   for(k=0; k<dbH->numFiles;k++){
     trackOffsetTable[k]=cumTrack;
     cumTrack+=trackTable[k]*dbH->dim;
@@ -1762,17 +1797,20 @@
 
   for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
 
+    trackOffset = trackOffsetTable[track];     // numDoubles offset
+
     // get trackID from file if using a control file
-    if(trackFile){
-      if(!trackFile->eof()){
-	trackFile->getline(nextKey,MAXSTR);
-	track=getKeyPos(nextKey);
+    if(trackFile) {
+      trackFile->getline(nextKey,MAXSTR);
+      if(!trackFile->eof()) {
+	track = getKeyPos(nextKey);
+        trackOffset = trackOffsetTable[track];
+        lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
+      } else {
+	break;
       }
-      else
-	break;
     }
 
-    trackOffset=trackOffsetTable[track];     // numDoubles offset
     trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
 
     if(sequenceLength<=trackTable[track]){  // test for short sequences
@@ -2264,9 +2302,9 @@
   double* dp;
 
   // build track offset table
-  unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
+  off_t *trackOffsetTable = new off_t[dbH->numFiles];
   unsigned cumTrack=0;
-  unsigned trackIndexOffset;
+  off_t trackIndexOffset;
   for(k=0; k<dbH->numFiles;k++){
     trackOffsetTable[k]=cumTrack;
     cumTrack+=trackTable[k]*dbH->dim;
@@ -2288,17 +2326,20 @@
 
   for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
 
+    trackOffset = trackOffsetTable[track];     // numDoubles offset
+
     // get trackID from file if using a control file
-    if(trackFile){
-      if(!trackFile->eof()){
-	trackFile->getline(nextKey,MAXSTR);
-	track=getKeyPos(nextKey);
+    if(trackFile) {
+      trackFile->getline(nextKey,MAXSTR);
+      if(!trackFile->eof()) {
+	track = getKeyPos(nextKey);
+        trackOffset = trackOffsetTable[track];
+        lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
+      } else {
+	break;
       }
-      else
-	break;
     }
 
-    trackOffset=trackOffsetTable[track];     // numDoubles offset
     trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
 
     if(sequenceLength<=trackTable[track]){  // test for short sequences