changeset 409:99e6cbad7f76 api-inversion

The lesser of two evils, part 2. Implement paths through audiodb_insert_datum_internal() for databases with O2_FLAG_LARGE_ADB, including in some of the helper functions. Most of the nasty stuff is concentrated in writing out the paths in what is now step 6, and everything else looks much as before, apart from a renumbering of the steps taken. Now we can implement audiodb_insert() for O2_FLAG_LARGE_ADB databases; we need to construct an adb_datum_internal_t from our adb_insert_t, but that's straightforward -- even just about straightforward enough to do it inline. Then audioDB::batchinsert() can be rewritten completely in terms of API functions, and doesn't need any kind of special treatment for the large case. Hooray. The real point of that is of course that we can now delete wodges of dead code, and move out audioDB::insert and audioDB::batchinsert into audioDB.cpp, because all they're doing now is dealing with command-line logic. This point marks the limit of what can be achieved in terms of "API inversion" at this time; the only remaining function, audiodb_query() / audioDB::query cannot be inverted because its API implementation is incomplete. Future plans, in some order: - merge this branch to trunk (check with current API/ABI clients); - complete audiodb_query() implementation; - invert audioDB::query / audiodb_query(); - MORE TESTS; - remove audioDB.cpp from list of files compiled into the library; - implement missing API functions (index, liszt, sample) directly; - source code rearrangement into library and command-line directories; - include bindings to library for some plausible candidate environments (Perl, Python, Lisp, Pd, Max/MSP) as examples; - API documentation.
author mas01cr
date Tue, 09 Dec 2008 22:48:30 +0000
parents f0a69693eaef
children d7e590d58c85
files audioDB.cpp audioDB.h insert.cpp
diffstat 3 files changed, 232 insertions(+), 356 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.cpp	Tue Dec 09 20:53:39 2008 +0000
+++ b/audioDB.cpp	Tue Dec 09 22:48:30 2008 +0000
@@ -733,6 +733,108 @@
   status(dbName);
 }
 
+void audioDB::insert(const char* dbName, const char* inFile) {
+  if(!adb) {
+    if(!(adb = audiodb_open(dbName, O_RDWR))) {
+      error("failed to open database", dbName);
+    }
+  }
+
+  /* at this point, we have powerfd (an fd), timesFile (a
+   * std::ifstream *) and inFile (a char *).  Wacky, huh?  Ignore
+   * the wackiness and just use the names. */
+  adb_insert_t insert;
+  insert.features = inFile;
+  insert.times = timesFileName;
+  insert.power = powerFileName;
+  insert.key = key;
+
+  if(audiodb_insert(adb, &insert)) {
+    error("insertion failure", inFile);
+  }
+  status(dbName);
+}
+
+void audioDB::batchinsert(const char* dbName, const char* inFile) {
+  if(!adb) {
+    if(!(adb = audiodb_open(dbName, O_RDWR))) {
+      error("failed to open database", dbName);
+    }
+  }
+
+  if(!key)
+    key=inFile;
+  std::ifstream *filesIn = 0;
+  std::ifstream *keysIn = 0;
+
+  if(!(filesIn = new std::ifstream(inFile)))
+    error("Could not open batch in file", inFile);
+  if(key && key!=inFile)
+    if(!(keysIn = new std::ifstream(key)))
+      error("Could not open batch key file",key);
+
+  unsigned totalVectors=0;
+  char *thisFile = new char[MAXSTR];
+  char *thisKey = 0;
+  if (key && (key != inFile)) {
+    thisKey = new char[MAXSTR];
+  }
+  char *thisTimesFileName = new char[MAXSTR];
+  char *thisPowerFileName = new char[MAXSTR];
+
+  do {
+    filesIn->getline(thisFile,MAXSTR);
+    if(key && key!=inFile) {
+      keysIn->getline(thisKey,MAXSTR);
+    } else {
+      thisKey = thisFile;
+    }
+    if(usingTimes) {
+      timesFile->getline(thisTimesFileName,MAXSTR);
+    }
+    if(usingPower) {
+      powerFile->getline(thisPowerFileName, MAXSTR);
+    }
+    
+    if(filesIn->eof()) {
+      break;
+    }
+    if(usingTimes){
+      if(timesFile->eof()) {
+        error("not enough timestamp files in timesList", timesFileName);
+      }
+    }
+    if (usingPower) {
+      if(powerFile->eof()) {
+        error("not enough power files in powerList", powerFileName);
+      }
+    }
+    adb_insert_t insert;
+    insert.features = thisFile;
+    insert.times = usingTimes ? thisTimesFileName : NULL;
+    insert.power = usingPower ? thisPowerFileName : NULL;
+    insert.key = thisKey;
+    if(audiodb_insert(adb, &insert)) {
+      error("insertion failure", thisFile);
+    }
+  } while(!filesIn->eof());
+
+  VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
+
+  delete [] thisPowerFileName;
+  if(key && (key != inFile)) {
+    delete [] thisKey;
+  }
+  delete [] thisFile;
+  delete [] thisTimesFileName;
+  
+  delete filesIn;
+  delete keysIn;
+
+  // Report status
+  status(dbName);
+}
+
 // This entry point is visited once per instance
 // so it is a good place to set any global state variables
 int main(const int argc, const char* argv[]){
--- a/audioDB.h	Tue Dec 09 20:53:39 2008 +0000
+++ b/audioDB.h	Tue Dec 09 22:48:30 2008 +0000
@@ -355,10 +355,8 @@
   void get_lock(int fd, bool exclusive);
   void release_lock(int fd);
   void create(const char* dbName);
-  bool enough_per_file_space_free();
   void insert(const char* dbName, const char* inFile);
   void batchinsert(const char* dbName, const char* inFile);
-  void batchinsert_large_adb(const char* dbName, const char* inFile);
   void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
   void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
 
--- a/insert.cpp	Tue Dec 09 20:53:39 2008 +0000
+++ b/insert.cpp	Tue Dec 09 22:48:30 2008 +0000
@@ -6,11 +6,15 @@
 
 static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
   adb_header_t *header = adb->header;
-  /* FIXME: timesTableOffset isn't necessarily the next biggest offset
-     after dataOffset.  Maybe make the offsets into an array that we
-     can iterate over... */
-  return (header->timesTableOffset > 
-          header->dataOffset + header->length + size);
+  if(header->flags & O2_FLAG_LARGE_ADB) {
+    return true;
+  } else {
+    /* FIXME: timesTableOffset isn't necessarily the next biggest
+     * offset after dataOffset.  Maybe make the offsets into an array
+     * that we can iterate over... */
+    return (header->timesTableOffset > 
+            (header->dataOffset + header->length + size));
+  }
 }
 
 static bool audiodb_enough_per_file_space_free(adb_t *adb) {
@@ -22,8 +26,24 @@
   int fmaxfiles = file_table_length / O2_FILETABLE_ENTRY_SIZE;
   int tmaxfiles = track_table_length / O2_TRACKTABLE_ENTRY_SIZE;
   /* maxfiles is the _minimum_ of the two.  Do not be confused... */
-  unsigned int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
-  return (header->numFiles < maxfiles);
+  int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
+  if(header->flags & O2_FLAG_LARGE_ADB) {
+    /* by default, these tables are created with the same size as the
+     * fileTable (which should be called key_table); relying on that
+     * always being the case, though, smacks of optimism, so instead
+     * we code defensively... */
+    off_t data_table_length = header->timesTableOffset - header->dataOffset;
+    off_t times_table_length = header->powerTableOffset - header->timesTableOffset;
+    off_t power_table_length = header->dbSize - header->powerTableOffset;
+    int dmaxfiles = data_table_length / O2_FILETABLE_ENTRY_SIZE;
+    int timaxfiles = times_table_length / O2_FILETABLE_ENTRY_SIZE;
+    int pmaxfiles = power_table_length / O2_FILETABLE_ENTRY_SIZE;
+    /* ... even though it means a certain amount of tedium. */
+    maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles;
+    maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles;
+    maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles;
+  }
+  return (header->numFiles < (unsigned int) maxfiles);
 }
 
 /*
@@ -33,28 +53,28 @@
  * datum into the database, we:
  *
  *  1. check write permission;
- *  2. check !O2_FLAG_LARGE_ADB;
- *  3. check for enough space;
- *  4. check that datum->dim and adb->header->dim agree (or that the
+ *  2. check for enough space;
+ *  3. check that datum->dim and adb->header->dim agree (or that the
  *     header dimension is zero, in which case write datum->dim to
  *     adb->header->dim).
- *  5. check for presence of datum->key in adb->keys;
- *  6. check for consistency between power and O2_FLAG_POWER, and 
+ *  4. check for presence of datum->key in adb->keys;
+ *  5. check for consistency between power and O2_FLAG_POWER, and 
  *     times and O2_FLAG_TIMES;
- *  7. write in data, power, times as appropriate; add to track
+ *  6. write in data, power, times as appropriate; add to track
  *     and key tables too;
- *  8. if O2_FLAG_L2NORM, compute norms and fill in table;
- *  9. update adb->keys and adb->header;
- * 10. sync adb->header with disk.
+ *  7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
+ *     in table;
+ *  8. update adb->keys and adb->header;
+ *  9. sync adb->header with disk.
  *
- * Step 10 essentially commits the transaction; until we update
+ * Step 9 essentially commits the transaction; until we update
  * header->length, nothing will recognize the newly-written data.  In
  * principle, if it fails, we should roll back, which we can in fact
- * do on the assumption that nothing in step 9 can ever fail; on the
+ * do on the assumption that nothing in step 8 can ever fail; on the
  * other hand, if it's failed, then it's unlikely that rolling back by
  * syncing the original header back to disk is going to work
  * desperately well.  We should perhaps take an operating-system lock
- * around step 10, so that we can't be interrupted part-way through
+ * around step 9, so that we can't be interrupted part-way through
  * (except of course for SIGKILL, but if we're hit with that we will
  * always lose).
  */
@@ -67,11 +87,7 @@
   if(!(adb->flags & O_RDWR)) {
     return 1;
   }
-  /* 2. check !O2_FLAG_LARGE_ADB; */
-  if(adb->header->flags & O2_FLAG_LARGE_ADB) {
-    return 1;
-  }
-  /* 3. check for enough space; */
+  /* 2. check for enough space; */
   size = sizeof(double) * datum->nvectors * datum->dim;
   if(!audiodb_enough_data_space_free(adb, size)) {
     return 1;
@@ -79,7 +95,7 @@
   if(!audiodb_enough_per_file_space_free(adb)) {
     return 1;
   }
-  /* 4. check that datum->dim and adb->header->dim agree (or that the
+  /* 3. check that datum->dim and adb->header->dim agree (or that the
    *    header dimension is zero, in which case write datum->dim to
    *    adb->header->dim).
    */
@@ -88,14 +104,14 @@
   } else if (adb->header->dim != datum->dim) {
     return 1;
   }
-  /* 5. check for presence of datum->key in adb->keys; */
+  /* 4. check for presence of datum->key in adb->keys; */
   if(adb->keys->count(datum->key)) {
     /* not part of an explicit API/ABI, but we need a distinguished
        value in this circumstance to preserve somewhat wonky behaviour
        of audioDB::batchinsert. */
     return 2;
   }
-  /* 6. check for consistency between power and O2_FLAG_POWER, and
+  /* 5. check for consistency between power and O2_FLAG_POWER, and
    *    times and O2_FLAG_TIMES; 
    */
   if((datum->power && !(adb->header->flags & O2_FLAG_POWER)) ||
@@ -111,30 +127,62 @@
   } else if ((adb->header->flags & O2_FLAG_TIMES) && !datum->times) {
     return 1;
   }
-  /* 7. write in data, power, times as appropriate; add to track
+  /* 6. write in data, power, times as appropriate; add to track
    *    and key tables too;
    */
   offset = adb->header->length;
   nfiles = adb->header->numFiles;
 
   /* FIXME: checking for all these lseek()s and write()s */
-  lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
-  write(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
-  if(datum->power) {
-    lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
-    write(adb->fd, datum->power, sizeof(double) * datum->nvectors);
-  }
-  if(datum->times) {
-    lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
-    write(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
-  }
+  lseek(adb->fd, adb->header->fileTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
+  write(adb->fd, datum->key, strlen(datum->key)+1);
   lseek(adb->fd, adb->header->trackTableOffset + nfiles * O2_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
   write(adb->fd, &datum->nvectors, O2_TRACKTABLE_ENTRY_SIZE);
-  lseek(adb->fd, adb->header->fileTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
-  write(adb->fd, datum->key, strlen(datum->key)+1);
+  if(adb->header->flags & O2_FLAG_LARGE_ADB) {
+    char cwd[PATH_MAX];
+    char slash = '/';
 
-  /* 8. if O2_FLAG_L2NORM, compute norms and fill in table; */
-  if(adb->header->flags & O2_FLAG_L2NORM) {
+    getcwd(cwd, PATH_MAX);
+    lseek(adb->fd, adb->header->dataOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
+    if(*((char *) datum->data) != '/') {
+      write(adb->fd, cwd, strlen(cwd));
+      write(adb->fd, &slash, 1);
+    }
+    write(adb->fd, datum->data, strlen((const char *) datum->data)+1);
+    if(datum->power) {
+      lseek(adb->fd, adb->header->powerTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
+      if(*((char *) datum->power) != '/') {
+        write(adb->fd, cwd, strlen(cwd));
+        write(adb->fd, &slash, 1);
+      }
+      write(adb->fd, datum->power, strlen((const char *) datum->power)+1);
+    }
+    if(datum->times) {
+      lseek(adb->fd, adb->header->timesTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
+      if(*((char *) datum->times) != '/') {
+        write(adb->fd, cwd, strlen(cwd));
+        write(adb->fd, &slash, 1);
+      }
+      write(adb->fd, datum->times, strlen((const char *) datum->times)+1);
+    }
+  } else {
+    lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
+    write(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
+    if(datum->power) {
+      lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
+      write(adb->fd, datum->power, sizeof(double) * datum->nvectors);
+    }
+    if(datum->times) {
+      lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
+      write(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
+    }
+  }
+
+  /* 7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
+   *    in table;
+   */
+  if((adb->header->flags & O2_FLAG_L2NORM) &&
+     !(adb->header->flags & O2_FLAG_LARGE_ADB)) {
     l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));
     
     /* FIXME: shared code with audiodb_norm_existing() */
@@ -153,10 +201,12 @@
     free(l2norm_buffer);
   }
 
+  /* 8. update adb->keys and adb->header; */
   adb->keys->insert(datum->key);
   adb->header->numFiles += 1;
   adb->header->length += sizeof(double) * datum->nvectors * datum->dim;
 
+  /* 9. sync adb->header with disk. */
   return audiodb_sync_header(adb);
 
  error:
@@ -174,16 +224,6 @@
   return audiodb_insert_datum_internal(adb, &d);
 }
 
-bool audioDB::enough_per_file_space_free() {
-  unsigned int fmaxfiles, tmaxfiles;
-  unsigned int maxfiles;
-
-  fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE;
-  tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE;
-  maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
-  return(dbH->numFiles < maxfiles);
-}
-
 static int audiodb_free_datum(adb_datum_t *datum) {
   if(datum->data) {
     free(datum->data);
@@ -287,7 +327,43 @@
 
 int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
   if(adb->header->flags & O2_FLAG_LARGE_ADB) {
-    return 1;
+    adb_datum_internal_t d;
+    struct stat st;
+    int fd;
+    int err;
+    off_t size;
+    
+    if((fd = open(insert->features, O_RDONLY)) == -1) {
+      return 1;
+    }
+    if(fstat(fd, &st)) {
+      return 1;
+    }
+    read(fd, &(d.dim), sizeof(uint32_t));
+    close(fd);
+    size = st.st_size - sizeof(uint32_t);
+    d.nvectors = size / (sizeof(double) * d.dim);
+    d.data = (void *) insert->features;
+    if(insert->power) {
+      if(stat(insert->power, &st)) {
+        return 1;
+      }
+    }
+    d.power = (void *) insert->power;
+    if(insert->times) {
+      if(stat(insert->times, &st)) {
+        return 1;
+      }
+    }
+    d.times = (void *) insert->times;
+    d.key = insert->key ? insert->key : insert->features;
+    err = audiodb_insert_datum_internal(adb, &d);
+
+    if(err == 2) {
+      return 0;
+    } else {
+      return err;
+    }
   } else {
     adb_datum_t datum;
     int err;
@@ -300,8 +376,7 @@
 
     if(err == 2) {
       return 0;
-    }
-    else {
+    } else {
       return err;
     }
   }
@@ -316,302 +391,3 @@
   }
   return 0;
 }
-
-void audioDB::insert(const char* dbName, const char* inFile) {
-  if(!adb) {
-    if(!(adb = audiodb_open(dbName, O_RDWR))) {
-      error("failed to open database", dbName);
-    }
-  }
-
-  /* at this point, we have powerfd (an fd), timesFile (a
-   * std::ifstream *) and inFile (a char *).  Wacky, huh?  Ignore
-   * the wackiness and just use the names. */
-  adb_insert_t insert;
-  insert.features = inFile;
-  insert.times = timesFileName;
-  insert.power = powerFileName;
-  insert.key = key;
-
-  if(audiodb_insert(adb, &insert)) {
-    error("insertion failure", inFile);
-  }
-  status(dbName);
-}
-
-void audioDB::batchinsert(const char* dbName, const char* inFile) {
-  forWrite = true;
-  initDBHeader(dbName);
-
-  // Treat large ADB instances differently
-  if( dbH->flags & O2_FLAG_LARGE_ADB ){
-    batchinsert_large_adb(dbName, inFile) ;
-    return;
-  }
-    
-  if(!key)
-    key=inFile;
-  std::ifstream *filesIn = 0;
-  std::ifstream *keysIn = 0;
-
-  if(!(filesIn = new std::ifstream(inFile)))
-    error("Could not open batch in file", inFile);
-  if(key && key!=inFile)
-    if(!(keysIn = new std::ifstream(key)))
-      error("Could not open batch key file",key);
-
-  unsigned totalVectors=0;
-  char *thisFile = new char[MAXSTR];
-  char *thisKey = 0;
-  if (key && (key != inFile)) {
-    thisKey = new char[MAXSTR];
-  }
-  char *thisTimesFileName = new char[MAXSTR];
-  char *thisPowerFileName = new char[MAXSTR];
-
-  do {
-    filesIn->getline(thisFile,MAXSTR);
-    if(key && key!=inFile) {
-      keysIn->getline(thisKey,MAXSTR);
-    } else {
-      thisKey = thisFile;
-    }
-    if(usingTimes) {
-      timesFile->getline(thisTimesFileName,MAXSTR);
-    }
-    if(usingPower) {
-      powerFile->getline(thisPowerFileName, MAXSTR);
-    }
-    
-    if(filesIn->eof()) {
-      break;
-    }
-    if(usingTimes){
-      if(timesFile->eof()) {
-        error("not enough timestamp files in timesList", timesFileName);
-      }
-    }
-    if (usingPower) {
-      if(powerFile->eof()) {
-        error("not enough power files in powerList", powerFileName);
-      }
-    }
-    adb_insert_t insert;
-    insert.features = thisFile;
-    insert.times = usingTimes ? thisTimesFileName : NULL;
-    insert.power = usingPower ? thisPowerFileName : NULL;
-    insert.key = thisKey;
-    if(audiodb_insert(adb, &insert)) {
-      error("insertion failure", thisFile);
-    }
-  } while(!filesIn->eof());
-
-  VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
-
-  delete [] thisPowerFileName;
-  if(key && (key != inFile)) {
-    delete [] thisKey;
-  }
-  delete [] thisFile;
-  delete [] thisTimesFileName;
-  
-  delete filesIn;
-  delete keysIn;
-
-  // Report status
-  status(dbName);
-}
-
-
-// BATCHINSERT_LARGE_ADB
-//
-// This method inserts file pointers into the ADB instance rather than the actual feature data
-//
-// This method is intended for databases that are large enough to only support indexed query
-// So exhaustive searching across all feature vectors will not be performed
-//
-// We insert featureFileName, [powerFileName], [timesFileName]
-//
-// l2norms and power sequence sums are calculated on-the-fly at INDEX and --lsh_exact QUERY time
-//
-// LIMITS:
-//
-// We impose an upper limit of 1M keys, 1M featureFiles, 1M powerFiles and 1M timesFiles
-//
-void audioDB::batchinsert_large_adb(const char* dbName, const char* inFile) {
-
-  if(!key)
-    key=inFile;
-  std::ifstream *filesIn = 0;
-  std::ifstream *keysIn = 0;
-  std::ifstream* thisTimesFile = 0;
-  int thispowerfd = 0;
-
-  if(!(filesIn = new std::ifstream(inFile)))
-    error("Could not open batch in file", inFile);
-  if(key && key!=inFile)
-    if(!(keysIn = new std::ifstream(key)))
-      error("Could not open batch key file",key);
-  
-  if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
-    error("Must use timestamps with timestamped database","use --times");
-
-  if(!usingPower && (dbH->flags & O2_FLAG_POWER))
-    error("Must use power with power-enabled database", dbName);
-
-  char *cwd = new char[PATH_MAX];
-
-  if ((getcwd(cwd, PATH_MAX)) == 0) {
-    error("error getting working directory", "", "getcwd");
-  }
-
-  unsigned totalVectors=0;
-  char *thisFile = new char[MAXSTR];
-  char *thisKey = 0;
-  if (key && (key != inFile)) {
-    thisKey = new char[MAXSTR];
-  }
-  char *thisTimesFileName = new char[MAXSTR];
-  char *thisPowerFileName = new char[MAXSTR];
-
-  std::set<std::string> s;
-
-  for (unsigned k = 0; k < dbH->numFiles; k++) {
-    s.insert(fileTable + k*O2_FILETABLE_ENTRY_SIZE);
-  }
-
-  do {
-    filesIn->getline(thisFile,MAXSTR);
-    if(key && key!=inFile) {
-      keysIn->getline(thisKey,MAXSTR);
-    } else {
-      thisKey = thisFile;
-    }
-    if(usingTimes) {
-      timesFile->getline(thisTimesFileName,MAXSTR);
-    }
-    if(usingPower) {
-      powerFile->getline(thisPowerFileName, MAXSTR);
-    }
-    
-    if(filesIn->eof()) {
-      break;
-    }
-    
-    initInputFile(thisFile, false);
-
-    if(!enough_per_file_space_free()) {
-      error("batchinsert failed: no more room for metadata", thisFile);
-    }
-
-    if(s.count(thisKey)) {
-      VERB_LOG(0, "key already exists in database: %s\n", thisKey);
-    } else {
-      s.insert(thisKey);
-      // Make a track index table of features to file indexes
-      unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
-      if(!numVectors) {
-        VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
-      }
-      else{
-	// Check that time-stamp file exists
-	if(usingTimes){
-	  if(timesFile->eof()) {
-	    error("not enough timestamp files in timesList", timesFileName);
-	  }
-	  thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
-	  if(!thisTimesFile->is_open()) {
-	    error("Cannot open timestamp file", thisTimesFileName);
-	  }
-	  if(thisTimesFile)
-	    delete thisTimesFile;
-	}
-
-	// Check that power file exists        
-        if (usingPower) {
-          if(powerFile->eof()) {
-            error("not enough power files in powerList", powerFileName);
-          }
-          thispowerfd = open(thisPowerFileName, O_RDONLY);
-          if (thispowerfd < 0) {
-            error("failed to open power file", thisPowerFileName);
-          }
-          if (0 < thispowerfd) {
-            close(thispowerfd);
-          }
-        }
-
-	// persist links to the feature files for reading from filesystem later
-	
-	// Primary Keys
-	INSERT_FILETABLE_STRING(fileTable, thisKey);
-
-	if(*thisFile != '/') {
-	  /* FIXME: MAXSTR and O2_FILETABLE_ENTRY_SIZE should probably
-	     be the same thing.  Also, both are related to PATH_MAX,
-	     which admittedly is not always defined or a
-	     constant... */
-	  char tmp[MAXSTR];
-	  strncpy(tmp, thisFile, MAXSTR);
-	  snprintf(thisFile, MAXSTR, "%s/%s", cwd, tmp);
-	}
-	// Feature Vector fileNames
-	INSERT_FILETABLE_STRING(featureFileNameTable, thisFile);
-	
-	// Time Stamp fileNames
-	if(usingTimes) {
-	  if(*thisTimesFileName != '/') {
-	    char tmp[MAXSTR];
-	    strncpy(tmp, thisTimesFileName, MAXSTR);
-	    snprintf(thisTimesFileName, MAXSTR, "%s/%s", cwd, tmp);
-	  }
-	  INSERT_FILETABLE_STRING(timesFileNameTable, thisTimesFileName);
-	}
-
-	// Power fileNames
-	if(usingPower) {
-	  if(*thisPowerFileName != '/') {
-	    char tmp[MAXSTR];
-	    strncpy(tmp, thisPowerFileName, MAXSTR);
-	    snprintf(thisPowerFileName, MAXSTR, "%s/%s", cwd, tmp);
-	  }
-	  INSERT_FILETABLE_STRING(powerFileNameTable, thisPowerFileName);
-	}
-
-	// Increment file count
-	dbH->numFiles++;  
-  
-	// Update Header information
-	dbH->length+=(statbuf.st_size-sizeof(int));
-  
-	// Update track to file index map
-	memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));  
-
-	totalVectors+=numVectors;
-
-	// Copy the header back to the database
-	memcpy (db, dbH, sizeof(dbTableHeaderT));  
-      }
-    }
-    // CLEAN UP
-    if(indata)
-      munmap(indata,statbuf.st_size);
-    if(infid>0)
-      close(infid);
-  } while(!filesIn->eof());
-
-  VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
-
-  delete [] thisPowerFileName;
-  if(key && (key != inFile)) {
-    delete [] thisKey;
-  }
-  delete [] thisFile;
-  delete [] thisTimesFileName;
-  
-  delete filesIn;
-  delete keysIn;
-
-  // Report status
-  status(dbName);
-}