Mercurial > hg > audiodb

--- a/audioDB.cpp	Tue Dec 09 20:53:39 2008 +0000
+++ b/audioDB.cpp	Tue Dec 09 22:48:30 2008 +0000
@@ -733,6 +733,108 @@
   status(dbName);
 }

+void audioDB::insert(const char* dbName, const char* inFile) {
+  if(!adb) {
+    if(!(adb = audiodb_open(dbName, O_RDWR))) {
+      error("failed to open database", dbName);
+    }
+  }
+
+  /* at this point, we have powerfd (an fd), timesFile (a
+   * std::ifstream *) and inFile (a char *).  Wacky, huh?  Ignore
+   * the wackiness and just use the names. */
+  adb_insert_t insert;
+  insert.features = inFile;
+  insert.times = timesFileName;
+  insert.power = powerFileName;
+  insert.key = key;
+
+  if(audiodb_insert(adb, &insert)) {
+    error("insertion failure", inFile);
+  }
+  status(dbName);
+}
+
+void audioDB::batchinsert(const char* dbName, const char* inFile) {
+  if(!adb) {
+    if(!(adb = audiodb_open(dbName, O_RDWR))) {
+      error("failed to open database", dbName);
+    }
+  }
+
+  if(!key)
+    key=inFile;
+  std::ifstream *filesIn = 0;
+  std::ifstream *keysIn = 0;
+
+  if(!(filesIn = new std::ifstream(inFile)))
+    error("Could not open batch in file", inFile);
+  if(key && key!=inFile)
+    if(!(keysIn = new std::ifstream(key)))
+      error("Could not open batch key file",key);
+
+  unsigned totalVectors=0;
+  char *thisFile = new char[MAXSTR];
+  char *thisKey = 0;
+  if (key && (key != inFile)) {
+    thisKey = new char[MAXSTR];
+  }
+  char *thisTimesFileName = new char[MAXSTR];
+  char *thisPowerFileName = new char[MAXSTR];
+
+  do {
+    filesIn->getline(thisFile,MAXSTR);
+    if(key && key!=inFile) {
+      keysIn->getline(thisKey,MAXSTR);
+    } else {
+      thisKey = thisFile;
+    }
+    if(usingTimes) {
+      timesFile->getline(thisTimesFileName,MAXSTR);
+    }
+    if(usingPower) {
+      powerFile->getline(thisPowerFileName, MAXSTR);
+    }
+
+    if(filesIn->eof()) {
+      break;
+    }
+    if(usingTimes){
+      if(timesFile->eof()) {
+        error("not enough timestamp files in timesList", timesFileName);
+      }
+    }
+    if (usingPower) {
+      if(powerFile->eof()) {
+        error("not enough power files in powerList", powerFileName);
+      }
+    }
+    adb_insert_t insert;
+    insert.features = thisFile;
+    insert.times = usingTimes ? thisTimesFileName : NULL;
+    insert.power = usingPower ? thisPowerFileName : NULL;
+    insert.key = thisKey;
+    if(audiodb_insert(adb, &insert)) {
+      error("insertion failure", thisFile);
+    }
+  } while(!filesIn->eof());
+
+  VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
+
+  delete [] thisPowerFileName;
+  if(key && (key != inFile)) {
+    delete [] thisKey;
+  }
+  delete [] thisFile;
+  delete [] thisTimesFileName;
+
+  delete filesIn;
+  delete keysIn;
+
+  // Report status
+  status(dbName);
+}
+
 // This entry point is visited once per instance
 // so it is a good place to set any global state variables
 int main(const int argc, const char* argv[]){
--- a/audioDB.h	Tue Dec 09 20:53:39 2008 +0000
+++ b/audioDB.h	Tue Dec 09 22:48:30 2008 +0000
@@ -355,10 +355,8 @@
   void get_lock(int fd, bool exclusive);
   void release_lock(int fd);
   void create(const char* dbName);
-  bool enough_per_file_space_free();
   void insert(const char* dbName, const char* inFile);
   void batchinsert(const char* dbName, const char* inFile);
-  void batchinsert_large_adb(const char* dbName, const char* inFile);
   void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
   void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
--- a/insert.cpp	Tue Dec 09 20:53:39 2008 +0000
+++ b/insert.cpp	Tue Dec 09 22:48:30 2008 +0000
@@ -6,11 +6,15 @@

 static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
   adb_header_t *header = adb->header;
-  /* FIXME: timesTableOffset isn't necessarily the next biggest offset
-     after dataOffset.  Maybe make the offsets into an array that we
-     can iterate over... */
-  return (header->timesTableOffset >
-          header->dataOffset + header->length + size);
+  if(header->flags & O2_FLAG_LARGE_ADB) {
+    return true;
+  } else {
+    /* FIXME: timesTableOffset isn't necessarily the next biggest
+     * offset after dataOffset.  Maybe make the offsets into an array
+     * that we can iterate over... */
+    return (header->timesTableOffset >
+            (header->dataOffset + header->length + size));
+  }
 }

 static bool audiodb_enough_per_file_space_free(adb_t *adb) {
@@ -22,8 +26,24 @@
   int fmaxfiles = file_table_length / O2_FILETABLE_ENTRY_SIZE;
   int tmaxfiles = track_table_length / O2_TRACKTABLE_ENTRY_SIZE;
   /* maxfiles is the _minimum_ of the two.  Do not be confused... */
-  unsigned int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
-  return (header->numFiles < maxfiles);
+  int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
+  if(header->flags & O2_FLAG_LARGE_ADB) {
+    /* by default, these tables are created with the same size as the
+     * fileTable (which should be called key_table); relying on that
+     * always being the case, though, smacks of optimism, so instead
+     * we code defensively... */
+    off_t data_table_length = header->timesTableOffset - header->dataOffset;
+    off_t times_table_length = header->powerTableOffset - header->timesTableOffset;
+    off_t power_table_length = header->dbSize - header->powerTableOffset;
+    int dmaxfiles = data_table_length / O2_FILETABLE_ENTRY_SIZE;
+    int timaxfiles = times_table_length / O2_FILETABLE_ENTRY_SIZE;
+    int pmaxfiles = power_table_length / O2_FILETABLE_ENTRY_SIZE;
+    /* ... even though it means a certain amount of tedium. */
+    maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles;
+    maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles;
+    maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles;
+  }
+  return (header->numFiles < (unsigned int) maxfiles);
 }

 /*
@@ -33,28 +53,28 @@
  * datum into the database, we:
  *
  *  1. check write permission;
- *  2. check !O2_FLAG_LARGE_ADB;
- *  3. check for enough space;
- *  4. check that datum->dim and adb->header->dim agree (or that the
+ *  2. check for enough space;
+ *  3. check that datum->dim and adb->header->dim agree (or that the
  *     header dimension is zero, in which case write datum->dim to
  *     adb->header->dim).
- *  5. check for presence of datum->key in adb->keys;
- *  6. check for consistency between power and O2_FLAG_POWER, and
+ *  4. check for presence of datum->key in adb->keys;
+ *  5. check for consistency between power and O2_FLAG_POWER, and
  *     times and O2_FLAG_TIMES;
- *  7. write in data, power, times as appropriate; add to track
+ *  6. write in data, power, times as appropriate; add to track
  *     and key tables too;
- *  8. if O2_FLAG_L2NORM, compute norms and fill in table;
- *  9. update adb->keys and adb->header;
- * 10. sync adb->header with disk.
+ *  7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
+ *     in table;
+ *  8. update adb->keys and adb->header;
+ *  9. sync adb->header with disk.
  *
- * Step 10 essentially commits the transaction; until we update
+ * Step 9 essentially commits the transaction; until we update
  * header->length, nothing will recognize the newly-written data.  In
  * principle, if it fails, we should roll back, which we can in fact
- * do on the assumption that nothing in step 9 can ever fail; on the
+ * do on the assumption that nothing in step 8 can ever fail; on the
  * other hand, if it's failed, then it's unlikely that rolling back by
  * syncing the original header back to disk is going to work
  * desperately well.  We should perhaps take an operating-system lock
- * around step 10, so that we can't be interrupted part-way through
+ * around step 9, so that we can't be interrupted part-way through
  * (except of course for SIGKILL, but if we're hit with that we will
  * always lose).
  */
@@ -67,11 +87,7 @@
   if(!(adb->flags & O_RDWR)) {
     return 1;
   }
-  /* 2. check !O2_FLAG_LARGE_ADB; */
-  if(adb->header->flags & O2_FLAG_LARGE_ADB) {
-    return 1;
-  }
-  /* 3. check for enough space; */
+  /* 2. check for enough space; */
   size = sizeof(double) * datum->nvectors * datum->dim;
   if(!audiodb_enough_data_space_free(adb, size)) {
     return 1;
@@ -79,7 +95,7 @@
   if(!audiodb_enough_per_file_space_free(adb)) {
     return 1;
   }
-  /* 4. check that datum->dim and adb->header->dim agree (or that the
+  /* 3. check that datum->dim and adb->header->dim agree (or that the
    *    header dimension is zero, in which case write datum->dim to
    *    adb->header->dim).
    */
@@ -88,14 +104,14 @@
   } else if (adb->header->dim != datum->dim) {
     return 1;
   }
-  /* 5. check for presence of datum->key in adb->keys; */
+  /* 4. check for presence of datum->key in adb->keys; */
   if(adb->keys->count(datum->key)) {
     /* not part of an explicit API/ABI, but we need a distinguished
        value in this circumstance to preserve somewhat wonky behaviour
        of audioDB::batchinsert. */
     return 2;
   }
-  /* 6. check for consistency between power and O2_FLAG_POWER, and
+  /* 5. check for consistency between power and O2_FLAG_POWER, and
    *    times and O2_FLAG_TIMES;
    */
   if((datum->power && !(adb->header->flags & O2_FLAG_POWER)) ||
@@ -111,30 +127,62 @@
   } else if ((adb->header->flags & O2_FLAG_TIMES) && !datum->times) {
     return 1;
   }
-  /* 7. write in data, power, times as appropriate; add to track
+  /* 6. write in data, power, times as appropriate; add to track
    *    and key tables too;
    */
   offset = adb->header->length;
   nfiles = adb->header->numFiles;

   /* FIXME: checking for all these lseek()s and write()s */
-  lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
-  write(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
-  if(datum->power) {
-    lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
-    write(adb->fd, datum->power, sizeof(double) * datum->nvectors);
-  }
-  if(datum->times) {
-    lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
-    write(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
-  }
+  lseek(adb->fd, adb->header->fileTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
+  write(adb->fd, datum->key, strlen(datum->key)+1);
   lseek(adb->fd, adb->header->trackTableOffset + nfiles * O2_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
   write(adb->fd, &datum->nvectors, O2_TRACKTABLE_ENTRY_SIZE);
-  lseek(adb->fd, adb->header->fileTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
-  write(adb->fd, datum->key, strlen(datum->key)+1);
+  if(adb->header->flags & O2_FLAG_LARGE_ADB) {
+    char cwd[PATH_MAX];
+    char slash = '/';

-  /* 8. if O2_FLAG_L2NORM, compute norms and fill in table; */
-  if(adb->header->flags & O2_FLAG_L2NORM) {
+    getcwd(cwd, PATH_MAX);
+    lseek(adb->fd, adb->header->dataOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
+    if(*((char *) datum->data) != '/') {
+      write(adb->fd, cwd, strlen(cwd));
+      write(adb->fd, &slash, 1);
+    }
+    write(adb->fd, datum->data, strlen((const char *) datum->data)+1);
+    if(datum->power) {
+      lseek(adb->fd, adb->header->powerTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
+      if(*((char *) datum->power) != '/') {
+        write(adb->fd, cwd, strlen(cwd));
+        write(adb->fd, &slash, 1);
+      }
+      write(adb->fd, datum->power, strlen((const char *) datum->power)+1);
+    }
+    if(datum->times) {
+      lseek(adb->fd, adb->header->timesTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
+      if(*((char *) datum->times) != '/') {
+        write(adb->fd, cwd, strlen(cwd));
+        write(adb->fd, &slash, 1);
+      }
+      write(adb->fd, datum->times, strlen((const char *) datum->times)+1);
+    }
+  } else {
+    lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
+    write(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
+    if(datum->power) {
+      lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
+      write(adb->fd, datum->power, sizeof(double) * datum->nvectors);
+    }
+    if(datum->times) {
+      lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
+      write(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
+    }
+  }
+
+  /* 7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
+   *    in table;
+   */
+  if((adb->header->flags & O2_FLAG_L2NORM) &&
+     !(adb->header->flags & O2_FLAG_LARGE_ADB)) {
     l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));

     /* FIXME: shared code with audiodb_norm_existing() */
@@ -153,10 +201,12 @@
     free(l2norm_buffer);
   }

+  /* 8. update adb->keys and adb->header; */
   adb->keys->insert(datum->key);
   adb->header->numFiles += 1;
   adb->header->length += sizeof(double) * datum->nvectors * datum->dim;

+  /* 9. sync adb->header with disk. */
   return audiodb_sync_header(adb);

  error:
@@ -174,16 +224,6 @@
   return audiodb_insert_datum_internal(adb, &d);
 }

-bool audioDB::enough_per_file_space_free() {
-  unsigned int fmaxfiles, tmaxfiles;
-  unsigned int maxfiles;
-
-  fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
-  tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
-  maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
-  return(dbH->numFiles < maxfiles);
-}
-
 static int audiodb_free_datum(adb_datum_t *datum) {
   if(datum->data) {
     free(datum->data);
@@ -287,7 +327,43 @@

 int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
   if(adb->header->flags & O2_FLAG_LARGE_ADB) {
-    return 1;
+    adb_datum_internal_t d;
+    struct stat st;
+    int fd;
+    int err;
+    off_t size;
+
+    if((fd = open(insert->features, O_RDONLY)) == -1) {
+      return 1;
+    }
+    if(fstat(fd, &st)) {
+      return 1;
+    }
+    read(fd, &(d.dim), sizeof(uint32_t));
+    close(fd);
+    size = st.st_size - sizeof(uint32_t);
+    d.nvectors = size / (sizeof(double) * d.dim);
+    d.data = (void *) insert->features;
+    if(insert->power) {
+      if(stat(insert->power, &st)) {
+        return 1;
+      }
+    }
+    d.power = (void *) insert->power;
+    if(insert->times) {
+      if(stat(insert->times, &st)) {
+        return 1;
+      }
+    }
+    d.times = (void *) insert->times;
+    d.key = insert->key ? insert->key : insert->features;
+    err = audiodb_insert_datum_internal(adb, &d);
+
+    if(err == 2) {
+      return 0;
+    } else {
+      return err;
+    }
   } else {
     adb_datum_t datum;
     int err;
@@ -300,8 +376,7 @@

     if(err == 2) {
       return 0;
-    }
-    else {
+    } else {
       return err;
     }
   }
@@ -316,302 +391,3 @@
   }
   return 0;
 }
-
-void audioDB::insert(const char* dbName, const char* inFile) {
-  if(!adb) {
-    if(!(adb = audiodb_open(dbName, O_RDWR))) {
-      error("failed to open database", dbName);
-    }
-  }
-
-  /* at this point, we have powerfd (an fd), timesFile (a
-   * std::ifstream *) and inFile (a char *).  Wacky, huh?  Ignore
-   * the wackiness and just use the names. */
-  adb_insert_t insert;
-  insert.features = inFile;
-  insert.times = timesFileName;
-  insert.power = powerFileName;
-  insert.key = key;
-
-  if(audiodb_insert(adb, &insert)) {
-    error("insertion failure", inFile);
-  }
-  status(dbName);
-}
-
-void audioDB::batchinsert(const char* dbName, const char* inFile) {
-  forWrite = true;
-  initDBHeader(dbName);
-
-  // Treat large ADB instances differently
-  if( dbH->flags & O2_FLAG_LARGE_ADB ){
-    batchinsert_large_adb(dbName, inFile) ;
-    return;
-  }
-
-  if(!key)
-    key=inFile;
-  std::ifstream *filesIn = 0;
-  std::ifstream *keysIn = 0;
-
-  if(!(filesIn = new std::ifstream(inFile)))
-    error("Could not open batch in file", inFile);
-  if(key && key!=inFile)
-    if(!(keysIn = new std::ifstream(key)))
-      error("Could not open batch key file",key);
-
-  unsigned totalVectors=0;
-  char *thisFile = new char[MAXSTR];
-  char *thisKey = 0;
-  if (key && (key != inFile)) {
-    thisKey = new char[MAXSTR];
-  }
-  char *thisTimesFileName = new char[MAXSTR];
-  char *thisPowerFileName = new char[MAXSTR];
-
-  do {
-    filesIn->getline(thisFile,MAXSTR);
-    if(key && key!=inFile) {
-      keysIn->getline(thisKey,MAXSTR);
-    } else {
-      thisKey = thisFile;
-    }
-    if(usingTimes) {
-      timesFile->getline(thisTimesFileName,MAXSTR);
-    }
-    if(usingPower) {
-      powerFile->getline(thisPowerFileName, MAXSTR);
-    }
-
-    if(filesIn->eof()) {
-      break;
-    }
-    if(usingTimes){
-      if(timesFile->eof()) {
-        error("not enough timestamp files in timesList", timesFileName);
-      }
-    }
-    if (usingPower) {
-      if(powerFile->eof()) {
-        error("not enough power files in powerList", powerFileName);
-      }
-    }
-    adb_insert_t insert;
-    insert.features = thisFile;
-    insert.times = usingTimes ? thisTimesFileName : NULL;
-    insert.power = usingPower ? thisPowerFileName : NULL;
-    insert.key = thisKey;
-    if(audiodb_insert(adb, &insert)) {
-      error("insertion failure", thisFile);
-    }
-  } while(!filesIn->eof());
-
-  VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
-
-  delete [] thisPowerFileName;
-  if(key && (key != inFile)) {
-    delete [] thisKey;
-  }
-  delete [] thisFile;
-  delete [] thisTimesFileName;
-
-  delete filesIn;
-  delete keysIn;
-
-  // Report status
-  status(dbName);
-}
-
-
-// BATCHINSERT_LARGE_ADB
-//
-// This method inserts file pointers into the ADB instance rather than the actual feature data
-//
-// This method is intended for databases that are large enough to only support indexed query
-// So exhaustive searching across all feature vectors will not be performed
-//
-// We insert featureFileName, [powerFileName], [timesFileName]
-//
-// l2norms and power sequence sums are calculated on-the-fly at INDEX and --lsh_exact QUERY time
-//
-// LIMITS:
-//
-// We impose an upper limit of 1M keys, 1M featureFiles, 1M powerFiles and 1M timesFiles
-//
-void audioDB::batchinsert_large_adb(const char* dbName, const char* inFile) {
-
-  if(!key)
-    key=inFile;
-  std::ifstream *filesIn = 0;
-  std::ifstream *keysIn = 0;
-  std::ifstream* thisTimesFile = 0;
-  int thispowerfd = 0;
-
-  if(!(filesIn = new std::ifstream(inFile)))
-    error("Could not open batch in file", inFile);
-  if(key && key!=inFile)
-    if(!(keysIn = new std::ifstream(key)))
-      error("Could not open batch key file",key);
-
-  if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
-    error("Must use timestamps with timestamped database","use --times");
-
-  if(!usingPower && (dbH->flags & O2_FLAG_POWER))
-    error("Must use power with power-enabled database", dbName);
-
-  char *cwd = new char[PATH_MAX];
-
-  if ((getcwd(cwd, PATH_MAX)) == 0) {
-    error("error getting working directory", "", "getcwd");
-  }
-
-  unsigned totalVectors=0;
-  char *thisFile = new char[MAXSTR];
-  char *thisKey = 0;
-  if (key && (key != inFile)) {
-    thisKey = new char[MAXSTR];
-  }
-  char *thisTimesFileName = new char[MAXSTR];
-  char *thisPowerFileName = new char[MAXSTR];
-
-  std::set<std::string> s;
-
-  for (unsigned k = 0; k < dbH->numFiles; k++) {
-    s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
-  }
-
-  do {
-    filesIn->getline(thisFile,MAXSTR);
-    if(key && key!=inFile) {
-      keysIn->getline(thisKey,MAXSTR);
-    } else {
-      thisKey = thisFile;
-    }
-    if(usingTimes) {
-      timesFile->getline(thisTimesFileName,MAXSTR);
-    }
-    if(usingPower) {
-      powerFile->getline(thisPowerFileName, MAXSTR);
-    }
-
-    if(filesIn->eof()) {
-      break;
-    }
-
-    initInputFile(thisFile, false);
-
-    if(!enough_per_file_space_free()) {
-      error("batchinsert failed: no more room for metadata", thisFile);
-    }
-
-    if(s.count(thisKey)) {
-      VERB_LOG(0, "key already exists in database: %s\n", thisKey);
-    } else {
-      s.insert(thisKey);
-      // Make a track index table of features to file indexes
-      unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
-      if(!numVectors) {
-        VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
-      }
-      else{
-	// Check that time-stamp file exists
-	if(usingTimes){
-	  if(timesFile->eof()) {
-	    error("not enough timestamp files in timesList", timesFileName);
-	  }
-	  thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
-	  if(!thisTimesFile->is_open()) {
-	    error("Cannot open timestamp file", thisTimesFileName);
-	  }
-	  if(thisTimesFile)
-	    delete thisTimesFile;
-	}
-
-	// Check that power file exists
-        if (usingPower) {
-          if(powerFile->eof()) {
-            error("not enough power files in powerList", powerFileName);
-          }
-          thispowerfd = open(thisPowerFileName, O_RDONLY);
-          if (thispowerfd < 0) {
-            error("failed to open power file", thisPowerFileName);
-          }
-          if (0 < thispowerfd) {
-            close(thispowerfd);
-          }
-        }
-
-	// persist links to the feature files for reading from filesystem later
-
-	// Primary Keys
-	INSERT_FILETABLE_STRING(fileTable, thisKey);
-
-	if(*thisFile != '/') {
-	  /* FIXME: MAXSTR and O2_FILETABLE_ENTRY_SIZE should probably
-	     be the same thing.  Also, both are related to PATH_MAX,
-	     which admittedly is not always defined or a
-	     constant... */
-	  char tmp[MAXSTR];
-	  strncpy(tmp, thisFile, MAXSTR);
-	  snprintf(thisFile, MAXSTR, "%s/%s", cwd, tmp);
-	}
-	// Feature Vector fileNames
-	INSERT_FILETABLE_STRING(featureFileNameTable, thisFile);
-
-	// Time Stamp fileNames
-	if(usingTimes) {
-	  if(*thisTimesFileName != '/') {
-	    char tmp[MAXSTR];
-	    strncpy(tmp, thisTimesFileName, MAXSTR);
-	    snprintf(thisTimesFileName, MAXSTR, "%s/%s", cwd, tmp);
-	  }
-	  INSERT_FILETABLE_STRING(timesFileNameTable, thisTimesFileName);
-	}
-
-	// Power fileNames
-	if(usingPower) {
-	  if(*thisPowerFileName != '/') {
-	    char tmp[MAXSTR];
-	    strncpy(tmp, thisPowerFileName, MAXSTR);
-	    snprintf(thisPowerFileName, MAXSTR, "%s/%s", cwd, tmp);
-	  }
-	  INSERT_FILETABLE_STRING(powerFileNameTable, thisPowerFileName);
-	}
-
-	// Increment file count
-	dbH->numFiles++;
-
-	// Update Header information
-	dbH->length+=(statbuf.st_size-sizeof(int));
-
-	// Update track to file index map
-	memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
-
-	totalVectors+=numVectors;
-
-	// Copy the header back to the database
-	memcpy (db, dbH, sizeof(dbTableHeaderT));
-      }
-    }
-    // CLEAN UP
-    if(indata)
-      munmap(indata,statbuf.st_size);
-    if(infid>0)
-      close(infid);
-  } while(!filesIn->eof());
-
-  VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
-
-  delete [] thisPowerFileName;
-  if(key && (key != inFile)) {
-    delete [] thisKey;
-  }
-  delete [] thisFile;
-  delete [] thisTimesFileName;
-
-  delete filesIn;
-  delete keysIn;
-
-  // Report status
-  status(dbName);
-}