changeset 401:a8a5f2ca5380 api-inversion

Invert audioDB::l2norm / audiodb_l2norm() We now have some functions that shouldn't be exported to the user of the library, but are used in more than one source file; case in point: audiodb_sync_header(). Make a new audioDB-internals.h file for them, and while we're at it put the scary mmap()-related macros in there too. We can't delete audioDB::unitNormAndInsertL2() quite yet, because it's used in insertion too, but we can delete the non-append branches. That's not very much code to lose, but every little helps.
author mas01cr
date Wed, 03 Dec 2008 14:53:20 +0000
parents 8c7453fb5bd9
children 58b88ab69424
files Makefile audioDB-internals.h audioDB.cpp audioDB.h audioDB_API.h dump.cpp insert.cpp l2norm.cpp power.cpp
diffstat 9 files changed, 133 insertions(+), 91 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Thu Nov 27 16:22:52 2008 +0000
+++ b/Makefile	Wed Dec 03 14:53:20 2008 +0000
@@ -8,7 +8,7 @@
 
 SHARED_LIB_FLAGS=-shared -Wl,-soname,
 
-LIBOBJS=insert.o create.o common.o open.o close.o status.o dump.o power.o liszt.o query.o sample.o index.o lshlib.o cmdline.o
+LIBOBJS=insert.o create.o common.o open.o close.o status.o dump.o power.o l2norm.o liszt.o query.o sample.o index.o lshlib.o cmdline.o
 OBJS=$(LIBOBJS) soap.o audioDB.o
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/audioDB-internals.h	Wed Dec 03 14:53:20 2008 +0000
@@ -0,0 +1,40 @@
+/* We could go gcc-specific here and use typeof() instead of passing
+ * in an explicit type.  Answers on a postcard as to whether that's a
+ * good plan or not. */
+#define mmap_or_goto_error(type, var, start, length) \
+  { void *tmp = mmap(0, length, PROT_READ, MAP_SHARED, adb->fd, (start)); \
+    if(tmp == (void *) -1) { \
+      goto error; \
+    } \
+    var = (type) tmp; \
+  }
+
+#define maybe_munmap(table, length) \
+  { if(table) { \
+      munmap(table, length); \
+    } \
+  }
+
+static inline int audiodb_sync_header(adb_t *adb) {
+  off_t pos;
+  pos = lseek(adb->fd, (off_t) 0, SEEK_CUR);
+  if(pos == (off_t) -1) {
+    goto error;
+  }
+  if(lseek(adb->fd, (off_t) 0, SEEK_SET) == (off_t) -1) {
+    goto error;
+  }
+  if(write(adb->fd, adb->header, O2_HEADERSIZE) != O2_HEADERSIZE) {
+    goto error;
+  }
+
+  /* can be fsync() if fdatasync() is racily exciting and new */
+  fdatasync(adb->fd);
+  if(lseek(adb->fd, pos, SEEK_SET) == (off_t) -1) {
+    goto error;
+  }
+  return 0;
+
+ error:
+  return 1;
+}
--- a/audioDB.cpp	Thu Nov 27 16:22:52 2008 +0000
+++ b/audioDB.cpp	Wed Dec 03 14:53:20 2008 +0000
@@ -757,17 +757,14 @@
 }
 
 void audioDB::l2norm(const char* dbName) {
-  forWrite = true;
-  initTables(dbName, 0);
-  if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){
-    /* FIXME: should probably be uint64_t */
-    unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
-    CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
-    unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
+  if(!adb) {
+    if(!(adb = audiodb_open(dbName, O_RDWR))) {
+      error("Failed to open database file", dbName);
+    }
   }
-  // Update database flags
-  dbH->flags = dbH->flags|O2_FLAG_L2NORM;
-  memcpy (db, dbH, O2_HEADERSIZE);
+  if(audiodb_l2norm(adb)) {
+    error("failed to turn on l2norm flag for database", dbName);
+  }
 }
 
 void audioDB::power_flag(const char *dbName) {
@@ -806,16 +803,13 @@
 /* FIXME: in fact this does not unit norm a block of features, it just
    records the L2 norms somewhere.  unitNorm() does in fact unit norm
    a block of features. */
-void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
+void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n){
   unsigned d;
   double *p;
   unsigned nn = n;
 
   assert(l2normTable);
 
-  if( !(dbH->flags & O2_FLAG_LARGE_ADB) && !append && (dbH->flags & O2_FLAG_L2NORM) )
-    error("Database is already L2 normed", "automatic norm on insert is enabled");
-
   VERB_LOG(2, "norming %u vectors...", n);
 
   double* l2buf = new double[n];
@@ -835,15 +829,12 @@
     X+=dim;
   }
   unsigned offset;
-  if(append) {
-    // FIXME: a hack, a very palpable hack: the vectors have already
-    // been inserted, and dbH->length has already been updated.  We
-    // need to subtract off again the number of vectors that we've
-    // inserted this time...
-    offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors
-  } else {
-    offset=0;
-  }
+
+  // FIXME: a hack, a very palpable hack: the vectors have already
+  // been inserted, and dbH->length has already been updated.  We
+  // need to subtract off again the number of vectors that we've
+  // inserted this time...
+  offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors
   memcpy(l2normTable+offset, l2buf, n*sizeof(double));
   if(l2buf)
     delete[] l2buf;
@@ -1140,20 +1131,5 @@
 
     return apierror;
   }
-
-  int audiodb_l2norm(adb_ptr mydb){
-
-      const char *argv[5];
-      int apierror=0;
-
-      argv[0]="audioDB";
-      argv[1]="--L2NORM";
-      argv[2]="-d";
-      argv[3]=mydb->path;
-      argv[4]='\0';
-
-      audioDB::audioDB(4,argv,&apierror,mydb);
-      return apierror;
-  }
 }
 
--- a/audioDB.h	Thu Nov 27 16:22:52 2008 +0000
+++ b/audioDB.h	Wed Dec 03 14:53:20 2008 +0000
@@ -336,7 +336,7 @@
   void initTables(const char* dbName, const char* inFile = 0);
   void initTablesFromKey(const char* dbName, const Uns32T queryIndex);
   void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
-  void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
+  void unitNormAndInsertL2(double* X, unsigned dim, unsigned n);
   void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata);
   void insertPowerData(unsigned n, int powerfd, double *powerdata);
   unsigned getKeyPos(char* key);
--- a/audioDB_API.h	Thu Nov 27 16:22:52 2008 +0000
+++ b/audioDB_API.h	Wed Dec 03 14:53:20 2008 +0000
@@ -5,33 +5,13 @@
  * Christophe Rhodes c.rhodes@gold.ac.uk
  * Ian Knopke mas01ik@gold.ac.uk, ian.knopke@gmail.com */
 
-
-/*******************************************************************/
-/* Data types for API */
-
 /* Temporary workarounds */
 typedef struct dbTableHeader adb_header_t;
 int acquire_lock(int, bool);
 
-/* Internal Macros: make sure they end up in the private version of
-   this header. */
 
-/* We could go gcc-specific here and use typeof() instead of passing
- * in an explicit type.  Answers on a postcard as to whether that's a
- * good plan or not. */
-#define mmap_or_goto_error(type, var, start, length) \
-  { void *tmp = mmap(0, length, PROT_READ, MAP_SHARED, adb->fd, (start)); \
-    if(tmp == (void *) -1) { \
-      goto error; \
-    } \
-    var = (type) tmp; \
-  }
-
-#define maybe_munmap(table, length) \
-  { if(table) { \
-      munmap(table, length); \
-    } \
-  }
+/*******************************************************************/
+/* Data types for API */
 
 /* The main struct that stores the name of the database, and in future will hold all
  * kinds of other interesting information */
@@ -139,5 +119,3 @@
 
 /* varoius dump formats */
 int audiodb_dump(adb_ptr mydb, const char *outputdir);
-
-
--- a/dump.cpp	Thu Nov 27 16:22:52 2008 +0000
+++ b/dump.cpp	Wed Dec 03 14:53:20 2008 +0000
@@ -1,6 +1,7 @@
 #include "audioDB.h"
 extern "C" {
 #include "audioDB_API.h"
+#include "audioDB-internals.h"
 }
 
 int audiodb_dump(adb_t *adb, const char *output) {
--- a/insert.cpp	Thu Nov 27 16:22:52 2008 +0000
+++ b/insert.cpp	Wed Dec 03 14:53:20 2008 +0000
@@ -102,7 +102,7 @@
   
   // Norm the vectors on input if the database is already L2 normed
   if(dbH->flags & O2_FLAG_L2NORM)
-    unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
+    unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors);
 
   // Report status
   status(dbName);
@@ -319,7 +319,7 @@
 	
 	// Norm the vectors on input if the database is already L2 normed
 	if(dbH->flags & O2_FLAG_L2NORM)
-	  unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
+	  unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors);
 	
 	totalVectors+=numVectors;
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/l2norm.cpp	Wed Dec 03 14:53:20 2008 +0000
@@ -0,0 +1,71 @@
+#include "audioDB.h"
+extern "C" {
+#include "audioDB_API.h"
+#include "audioDB-internals.h"
+}
+
+static int audiodb_l2norm_existing(adb_t *adb) {
+  double *data_buffer, *l2norm_buffer;
+  double *dp, *lp;
+  adb_header_t *header = adb->header;
+  size_t data_buffer_size = ALIGN_PAGE_UP(header->length);
+  size_t nvectors = header->length / (sizeof(double) * header->dim);
+  /* FIXME: this map of the vector data will lose if we ever turn the
+   * l2norm flag on when we have already inserted a large number of
+   * vectors, as the mmap() will fail.  "Don't do that, then" is one
+   * possible answer. */
+  mmap_or_goto_error(double *, data_buffer, header->dataOffset, data_buffer_size);
+  l2norm_buffer = (double *) malloc(nvectors * sizeof(double));
+  if(!l2norm_buffer) {
+    goto error;
+  }
+
+  dp = data_buffer;
+  lp = l2norm_buffer;
+  for(size_t i = 0; i < nvectors; i++) {
+    *lp = 0;
+    for(unsigned int k = 0; k < header->dim; k++) {
+      *lp += (*dp)*(*dp);
+      dp++;
+    }
+    lp++;
+  }
+
+  if(lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET) == (off_t) -1) {
+    goto error;
+  }
+  if(write(adb->fd, l2norm_buffer, nvectors * sizeof(double)) != (ssize_t) (nvectors * sizeof(double))) {
+    goto error;
+  }
+
+  munmap(data_buffer, data_buffer_size);
+  free(l2norm_buffer);
+
+  return 0;
+
+ error:
+  maybe_munmap(data_buffer, data_buffer_size);
+  if(l2norm_buffer) {
+    free(l2norm_buffer);
+  }
+  return 1;
+}
+
+int audiodb_l2norm(adb_t *adb) {
+  adb_header_t *header = adb->header;
+  if(header->flags & O2_FLAG_L2NORM) {
+    /* non-error code for forthcoming backwards-compatibility
+     * reasons */
+    return 0;
+  }
+  if((!(header->flags & O2_FLAG_LARGE_ADB)) && (header->length > 0)) {
+    if(audiodb_l2norm_existing(adb)) {
+      goto error;
+    }
+  }
+  adb->header->flags |= O2_FLAG_L2NORM;
+  return audiodb_sync_header(adb);
+
+ error:
+  return 1;
+}
--- a/power.cpp	Thu Nov 27 16:22:52 2008 +0000
+++ b/power.cpp	Wed Dec 03 14:53:20 2008 +0000
@@ -1,31 +1,7 @@
 #include "audioDB.h"
 extern "C" {
 #include "audioDB_API.h"
-}
-
-/* FIXME: we should not export this symbol to users of the library. */
-int audiodb_sync_header(adb_t *adb) {
-  off_t pos;
-  pos = lseek(adb->fd, (off_t) 0, SEEK_CUR);
-  if(pos == (off_t) -1) {
-    goto error;
-  }
-  if(lseek(adb->fd, (off_t) 0, SEEK_SET) == (off_t) -1) {
-    goto error;
-  }
-  if(write(adb->fd, adb->header, O2_HEADERSIZE) != O2_HEADERSIZE) {
-    goto error;
-  }
-
-  /* can be fsync() if fdatasync() is racily exciting and new */
-  fdatasync(adb->fd);
-  if(lseek(adb->fd, pos, SEEK_SET) == (off_t) -1) {
-    goto error;
-  }
-  return 0;
-
- error:
-  return 1;
+#include "audioDB-internals.h"
 }
 
 int audiodb_power(adb_t *adb) {