changeset 380:7d6dd067d12e

Support --DUMP on databases with O2_FLAG_LARGE_ADB. (This was harder than I expected; it only works given a change in the insertion policy for LARGE_ADB -- any relative pathnames must be resolved to absolute ones at insert time, otherwise all bets are off.) [ In other news, reviewing the various bits of LARGE_ADB code does not fill me with happy happy joy joy feelings: expect refactorings at the drop of a hat. ]
author mas01cr
date Fri, 21 Nov 2008 12:23:08 +0000
parents 7e6c99481b8b
children 9742ea0ac33d
files dump.cpp insert.cpp
diffstat 2 files changed, 114 insertions(+), 70 deletions(-) [+]
line wrap: on
line diff
--- a/dump.cpp	Sun Nov 16 21:18:40 2008 +0000
+++ b/dump.cpp	Fri Nov 21 12:23:08 2008 +0000
@@ -5,10 +5,6 @@
     initTables(dbName, 0);
   }
 
-  if(dbH->flags & O2_FLAG_LARGE_ADB){
-    error("error: dump not supported for LARGE_ADB");
-  }
-
   if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
     error("error making output directory", output, "mkdir");
   }
@@ -67,74 +63,96 @@
   size_t data_buffer_size;
   for(unsigned k = 0; k < dbH->numFiles; k++) {
     fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLE_ENTRY_SIZE);
-    snprintf(fName, 256, "%05d.features", k);
-    if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
-      error("error creating feature file", fName, "open");
-    }
-    if ((write(ffd, &dbH->dim, sizeof(uint32_t))) < 0) {
-      error("error writing dimensions", fName, "write");
-    }
-
-    /* FIXME: this repeated malloc()/free() of data buffers is
-       inefficient. */
-    data_buffer_size = trackTable[k] * dbH->dim * sizeof(double);
-
-    {
-      void *tmp = malloc(data_buffer_size);
-      if (tmp == NULL) {
-	error("error allocating data buffer");
+    if(dbH->flags & O2_FLAG_LARGE_ADB) {
+      char *featureFileName = featureFileNameTable+k*O2_FILETABLE_ENTRY_SIZE;
+      fprintf(fLFile, "%s\n", featureFileName);
+      if(*featureFileName != '/') {
+	error("relative path in LARGE_ADB", featureFileName);
       }
-      data_buffer = (double *) tmp;
-    }
-
-    if ((read(dbfid, data_buffer, data_buffer_size)) != (ssize_t) data_buffer_size) {
-      error("error reading data", fName, "read");
-    }
-
-    if ((write(ffd, data_buffer, data_buffer_size)) < 0) {
-      error("error writing data", fName, "write");
-    }
-
-    free(data_buffer);
-
-    fprintf(fLFile, "%s\n", fName);
-    close(ffd);
-
-    if (times) {
-      snprintf(fName, 256, "%05d.times", k);
-      tFile = fopen(fName, "w");
-      for(unsigned i = 0; i < trackTable[k]; i++) {
-        // KLUDGE: specifying 16 digits of precision after the decimal
-        // point is (but check this!) sufficient to uniquely identify
-        // doubles; however, that will cause ugliness, as that's
-        // vastly too many for most values of interest.  Moving to %a
-        // here and scanf() in the timesFile reading might fix this.
-        // -- CSR, 2007-10-19
-        fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*i));
+      if(times) {
+	char *timesFileName = timesFileNameTable + k*O2_FILETABLE_ENTRY_SIZE;
+	fprintf(tLFile, "%s\n", timesFileName);
+	if(*timesFileName != '/') {
+	  error("relative path in LARGE_ADB", timesFileName);	  
+	}
       }
-      fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*trackTable[k]-1));
-
-      fprintf(tLFile, "%s\n", fName);
-    }
-
-    if (power) {
-      uint32_t one = 1;
-      snprintf(fName, 256, "%05d.power", k);
-      if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
-	error("error creating power file", fName, "open");
+      if(power) {
+	char *powerFileName = powerFileNameTable + k*O2_FILETABLE_ENTRY_SIZE;
+	fprintf(pLFile, "%s\n", powerFileName);
+	if(*powerFileName != '/') {
+	  error("relative path in LARGE_ADB", powerFileName);
+	}
       }
-      if ((write(pfd, &one, sizeof(uint32_t))) < 0) {
-	error("error writing one", fName, "write");
+    } else {
+      snprintf(fName, 256, "%05d.features", k);
+      if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
+	error("error creating feature file", fName, "open");
       }
-      if ((write(pfd, powerTable + pos, trackTable[k] * sizeof(double))) < 0) {
+      if ((write(ffd, &dbH->dim, sizeof(uint32_t))) < 0) {
+	error("error writing dimensions", fName, "write");
+      }
+      
+      /* FIXME: this repeated malloc()/free() of data buffers is
+	 inefficient. */
+      data_buffer_size = trackTable[k] * dbH->dim * sizeof(double);
+      
+      {
+	void *tmp = malloc(data_buffer_size);
+	if (tmp == NULL) {
+	  error("error allocating data buffer");
+	}
+	data_buffer = (double *) tmp;
+      }
+      
+      if ((read(dbfid, data_buffer, data_buffer_size)) != (ssize_t) data_buffer_size) {
+	error("error reading data", fName, "read");
+      }
+      
+      if ((write(ffd, data_buffer, data_buffer_size)) < 0) {
 	error("error writing data", fName, "write");
       }
-      fprintf(pLFile, "%s\n", fName);
-      close(pfd);
-    } 
-
-    pos += trackTable[k];
-    std::cout << fileTable+k*O2_FILETABLE_ENTRY_SIZE << " " << trackTable[k] << std::endl;
+      
+      free(data_buffer);
+      
+      fprintf(fLFile, "%s\n", fName);
+      close(ffd);
+      
+      if (times) {
+	snprintf(fName, 256, "%05d.times", k);
+	tFile = fopen(fName, "w");
+	for(unsigned i = 0; i < trackTable[k]; i++) {
+	  // KLUDGE: specifying 16 digits of precision after the decimal
+	  // point is (but check this!) sufficient to uniquely identify
+	  // doubles; however, that will cause ugliness, as that's
+	  // vastly too many for most values of interest.  Moving to %a
+	  // here and scanf() in the timesFile reading might fix this.
+	  // -- CSR, 2007-10-19
+	  fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*i));
+	}
+	fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*trackTable[k]-1));
+	
+	fprintf(tLFile, "%s\n", fName);
+      }
+      
+      if (power) {
+	uint32_t one = 1;
+	snprintf(fName, 256, "%05d.power", k);
+	if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
+	  error("error creating power file", fName, "open");
+	}
+	if ((write(pfd, &one, sizeof(uint32_t))) < 0) {
+	  error("error writing one", fName, "write");
+	}
+	if ((write(pfd, powerTable + pos, trackTable[k] * sizeof(double))) < 0) {
+	  error("error writing data", fName, "write");
+	}
+	fprintf(pLFile, "%s\n", fName);
+	close(pfd);
+      } 
+      
+      pos += trackTable[k];
+      std::cout << fileTable+k*O2_FILETABLE_ENTRY_SIZE << " " << trackTable[k] << std::endl;
+    }
   }
 
   FILE *scriptFile;
--- a/insert.cpp	Sun Nov 16 21:18:40 2008 +0000
+++ b/insert.cpp	Fri Nov 21 12:23:08 2008 +0000
@@ -388,6 +388,12 @@
   if(!usingPower && (dbH->flags & O2_FLAG_POWER))
     error("Must use power with power-enabled database", dbName);
 
+  char *cwd = new char[PATH_MAX];
+
+  if ((getcwd(cwd, PATH_MAX)) == 0) {
+    error("error getting working directory", "", "getcwd");
+  }
+
   unsigned totalVectors=0;
   char *thisFile = new char[MAXSTR];
   char *thisKey = 0;
@@ -468,18 +474,38 @@
 	
 	// Primary Keys
 	INSERT_FILETABLE_STRING(fileTable, thisKey);
-	
+
+	if(*thisFile != '/') {
+	  /* FIXME: MAXSTR and O2_FILETABLE_ENTRY_SIZE should probably
+	     be the same thing.  Also, both are related to PATH_MAX,
+	     which admittedly is not always defined or a
+	     constant... */
+	  char tmp[MAXSTR];
+	  strncpy(tmp, thisFile, MAXSTR);
+	  snprintf(thisFile, MAXSTR, "%s/%s", cwd, tmp);
+	}
 	// Feature Vector fileNames
 	INSERT_FILETABLE_STRING(featureFileNameTable, thisFile);
 	
 	// Time Stamp fileNames
-	if(usingTimes)
+	if(usingTimes) {
+	  if(*thisTimesFileName != '/') {
+	    char tmp[MAXSTR];
+	    strncpy(tmp, thisTimesFileName, MAXSTR);
+	    snprintf(thisTimesFileName, MAXSTR, "%s/%s", cwd, tmp);
+	  }
 	  INSERT_FILETABLE_STRING(timesFileNameTable, thisTimesFileName);
-
+	}
 
 	// Power fileNames
-	if(usingPower)
+	if(usingPower) {
+	  if(*thisPowerFileName != '/') {
+	    char tmp[MAXSTR];
+	    strncpy(tmp, thisPowerFileName, MAXSTR);
+	    snprintf(thisPowerFileName, MAXSTR, "%s/%s", cwd, tmp);
+	  }
 	  INSERT_FILETABLE_STRING(powerFileNameTable, thisPowerFileName);
+	}
 
 	// Increment file count
 	dbH->numFiles++;