changeset 321:da2272e029b3 large_adb

Added --adb_feature_root=path filename prefix for locating LARGE_ADB features with relative paths at QUERY time. Also added convenience argument --adb_root=path prefix for -d database command option.
author mas01mc
date Thu, 21 Aug 2008 19:16:21 +0000
parents a995e5ad999a
children 634959ef98f2
files audioDB.cpp audioDB.h common.cpp gengetopt.in index.cpp insert.cpp query.cpp
diffstat 7 files changed, 87 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.cpp	Wed Aug 20 13:50:58 2008 +0000
+++ b/audioDB.cpp	Thu Aug 21 19:16:21 2008 +0000
@@ -34,6 +34,10 @@
     error("No command found");
   }
 
+  // perform dbName path prefix subbsitution
+  if(adb_root)
+    prefix_name((char** const)&dbName, adb_root);
+
   if(O2_ACTION(COM_SERVER))
     startServer();
 
@@ -243,6 +247,14 @@
     relative_threshold = args_info.relative_threshold_arg;
   }
 
+  if (args_info.adb_root_given){
+    adb_root = args_info.adb_root_arg;
+  }
+
+  if (args_info.adb_feature_root_given){
+    adb_feature_root = args_info.adb_feature_root_arg;
+  }
+    
   if(args_info.SERVER_given){
     command=COM_SERVER;
     port=args_info.SERVER_arg;
--- a/audioDB.h	Wed Aug 20 13:50:58 2008 +0000
+++ b/audioDB.h	Thu Aug 21 19:16:21 2008 +0000
@@ -156,8 +156,8 @@
 
 // We will only use this in a 32-bit address space
 // So map the off_t down to 32-bits first
-#define INSERT_FILETABLE_STRING(OFFSET, STR) \
-    strncpy((char*)((Uns32T)OFFSET) + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, STR, strlen(STR));
+#define INSERT_FILETABLE_STRING(TABLE, STR) \
+    strncpy(TABLE + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, STR, strlen(STR));
 
 #define SAFE_DELETE(PTR) delete PTR; PTR=0;
 #define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0;
@@ -208,8 +208,10 @@
   std::ifstream *timesFile;
   const char *powerFileName;
   std::ifstream *powerFile;
+  const char* adb_root;
+  const char* adb_feature_root;
+
   int powerfd;
-
   int dbfid;
   int lshfid;
   bool forWrite;
@@ -306,6 +308,8 @@
   void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata);
   void insertPowerData(unsigned n, int powerfd, double *powerdata);
   unsigned getKeyPos(char* key);
+  void prefix_name(char** const name, const char* prefix);
+
  public:
   audioDB(const unsigned argc, char* const argv[]);
   audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse);
@@ -394,7 +398,9 @@
     timesFile(0),				\
     powerFileName(0),				\
     powerFile(0),				\
-    powerfd(0),					\
+    adb_root(0),                                \
+    adb_feature_root(0),                        \
+    powerfd(0),                                 \
     dbfid(0),					\
     lshfid(0),					\
     forWrite(false),				\
--- a/common.cpp	Wed Aug 20 13:50:58 2008 +0000
+++ b/common.cpp	Thu Aug 21 19:16:21 2008 +0000
@@ -230,3 +230,21 @@
     initInputFile(inFile);
 }
 
+// If name is relative path, side effect name with prefix/name
+// Do not free original pointer
+void audioDB::prefix_name(char** const name, const char* prefix){
+  // No prefix if prefix is empty
+  if(!prefix)
+    return;
+  // Allocate new memory, keep old memory
+  assert(name && *name);
+  if (strlen(*name) + strlen(prefix) + 1 > O2_MAXFILESTR)
+    error("error: path prefix + filename too long",prefix);
+  // Do not prefix absolute path+filename
+  if(**name=='/')
+    return;
+  // OK to prefix relative path+filename
+  char* prefixedName = (char*) malloc(O2_MAXFILESTR);
+  sprintf(prefixedName, "%s/%s", prefix, *name);
+  *name = prefixedName; // side effect new name to old name
+}
--- a/gengetopt.in	Wed Aug 20 13:50:58 2008 +0000
+++ b/gengetopt.in	Thu Aug 21 19:16:21 2008 +0000
@@ -8,6 +8,7 @@
 section "Database Operations" sectiondesc="All database operations require a database argument."
 
 option "database" d "database file required by Database commands." string typestr="filename" optional
+option "adb_root" - "path prefix for database" string typestr="path" dependon="database" optional
 
 section "Database Creation" sectiondesc="Creating a new database file."
 
@@ -23,7 +24,7 @@
 option "output" - "output directory" string dependon="DUMP" default="audioDB.dump" optional
 option "L2NORM" L "unit norm vectors and norm all future inserts." dependon="database" optional
 option "POWER"  P "turn on power flag for database." dependon="database" optional
-option "INDEX"  X "build an index for -d database at -R radius" dependon="database" dependon="radius" optional
+
 section "Database Information" sectiondesc="Information about databases."
 
 option "STATUS" S "output database information to stdout." dependon="database" optional
@@ -33,7 +34,7 @@
 section "Database Insertion" sectiondesc="The following commands insert feature files, with optional keys and timestamps.\n"
 
 option "INSERT"      I "add feature vectors to an existing database." dependon="features" optional
-option "UPDATE"      U "replace inserted vectors associated with key with new input vectors." dependon="features" dependon="key" dependon="database" optional hidden
+option "adb_feature_root" - "path prefix for feature files, times files and power files" string typestr="path" optional
 option "features" f "binary series of vectors file {int sz:ieee double[][sz]:eof}." string typestr="filename" dependon="database" optional
 option "times"    t "list of time points (ascii) for feature vectors." string typestr="filename" dependon="features" optional
 option "power"    w "binary power feature file." string typestr="filename" dependon="database" optional
@@ -62,6 +63,7 @@
 
 section "Locality-sensitive hashing (LSH) parameters" sectiondesc="These parameters control LSH indexing and retrieval\n"
 
+option "INDEX"  X "build an index for -d database at -R radius and -l sequenceLength" dependon="database" dependon="radius" optional
 option "lsh_w" - "width of LSH hash-function bins. " double default="4.0" dependon="INDEX" optional hidden
 option "lsh_k" - "even number of independent hash functions to employ with LSH" int typestr="size" default="8" dependon="INDEX" optional
 option "lsh_m" - "number of hash tables is m(m-1)/2" int typestr="size" default="5" dependon="INDEX" optional
@@ -79,9 +81,10 @@
 section "Web Services" sectiondesc="These commands enable the database process to establish a connection via the internet and operate as separate client and server processes.\n"
 
 option "SERVER" s "run as standalone web service on named port." int typestr="port" default="14475" optional
+option "load_index" - "make web service with memory-resident hashtables" flag off dependon="radius" optional
 option "client" c "run as a client using named host service." string typestr="hostname:port" optional
-option "load_index" - "make web service with memory-resident hashtables" flag off dependon="radius" optional
+
 
 text "
-Copyright (c) 2007 Michael Casey, Christophe Rhodes
+Copyright (c) 2007-2008 Michael Casey, Christophe Rhodes
                   Goldsmiths, University of London"
--- a/index.cpp	Wed Aug 20 13:50:58 2008 +0000
+++ b/index.cpp	Thu Aug 21 19:16:21 2008 +0000
@@ -57,6 +57,10 @@
     return true;
 }
 
+// If we are a server and have a memory-resident index, check the indexName against the resident index (using get_indexName())
+// If they match, i.e. path+dbName_resident == path+dbName_requested, use
+// the memory-resident index.
+// Else allocate a new LSH instance and load the index from disk
 LSH* audioDB::index_allocate(char* indexName, bool load_hashTables){
   LSH* gIndx=SERVER_LSH_INDEX_SINGLETON;
   if(isServer && gIndx && (strncmp(gIndx->get_indexName(), indexName, MAXSTR)==0) )
@@ -245,18 +249,24 @@
 
   // Allocate and read the power sequence
   if(trackTable[trackID]>=sequenceLength){
-
+    
+    char* prefixedString = new char[O2_MAXFILESTR];
+    char* tmpStr = prefixedString;
     // Open and check dimensions of power file
-    powerfd = open(powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O_RDONLY);
+    strncpy(prefixedString, powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
+    prefix_name((char ** const)&prefixedString, adb_feature_root);
+    if(prefixedString!=tmpStr)
+      delete[] tmpStr;
+    powerfd = open(prefixedString, O_RDONLY);
     if (powerfd < 0) {
-      error("failed to open power file", powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE);
+      error("failed to open power file", prefixedString);
     }
     if (fstat(powerfd, &statbuf) < 0) {
-      error("fstat error finding size of power file", powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, "fstat");
+      error("fstat error finding size of power file", prefixedString, "fstat");
     }
     
     if( (statbuf.st_size - sizeof(int)) / (sizeof(double)) != trackTable[trackID] )
-      error("Dimension mismatch: numPowers != numVectors", powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE);
+      error("Dimension mismatch: numPowers != numVectors", prefixedString);
    
     *sPowerp = new double[trackTable[trackID]]; // Allocate memory for power values
     assert(*sPowerp);
@@ -292,8 +302,14 @@
   int trackfd = dbfid;
   for(trackID = start_track ; trackID < end_track ; trackID++ ){
     if( dbH->flags & O2_FLAG_LARGE_ADB ){
+      char* prefixedString = new char[O2_MAXFILESTR];
+      char* tmpStr = prefixedString;
       // Open and check dimensions of feature file
-      initInputFile(featureFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, false); // nommap, file pointer at correct position
+      strncpy(prefixedString, featureFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
+      prefix_name((char ** const) &prefixedString, adb_feature_root);
+      if(prefixedString!=tmpStr)
+	delete[] tmpStr;
+      initInputFile(prefixedString, false); // nommap, file pointer at correct position
       trackfd = infid;
     }
     read_data(trackfd, trackID, &fvp, &nfv); // over-writes fvp and nfv
--- a/insert.cpp	Wed Aug 20 13:50:58 2008 +0000
+++ b/insert.cpp	Thu Aug 21 19:16:21 2008 +0000
@@ -492,8 +492,10 @@
       }
     }
     // CLEAN UP
-    munmap(indata,statbuf.st_size);
-    close(infid);
+    if(indata)
+      munmap(indata,statbuf.st_size);
+    if(infid>0)
+      close(infid);
   } while(!filesIn->eof());
 
   VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
--- a/query.cpp	Wed Aug 20 13:50:58 2008 +0000
+++ b/query.cpp	Thu Aug 21 19:16:21 2008 +0000
@@ -345,7 +345,13 @@
   if( dbH->flags & O2_FLAG_LARGE_ADB ){
     if(infid>0)
       close(infid);
-    initInputFile(featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, false); // nommap, file pointer at correct position
+    char* prefixedString = new char[O2_MAXFILESTR];
+    char* tmpStr = prefixedString;
+    strncpy(prefixedString, featureFileNameTable+queryIndex*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
+    prefix_name(&prefixedString, adb_feature_root);
+    if(tmpStr!=prefixedString)
+      delete[] tmpStr;
+    initInputFile(prefixedString, false); // nommap, file pointer at correct position
     size_t allocatedSize = 0;
     read_data(infid, queryIndex, qp, &allocatedSize); // over-writes qp and allocatedSize
     // Consistency check on allocated memory and query feature size
@@ -531,6 +537,8 @@
       trackOffset=0;
       trackIndexOffset=0;
       if(currentTrack!=pp.trackID){
+	char* prefixedString = new char[O2_MAXFILESTR];
+	char* tmpStr = prefixedString;
 	// On currentTrack change, allocate and load track data
 	currentTrack=pp.trackID;
 	SAFE_DELETE_ARRAY(sNorm);
@@ -538,7 +546,11 @@
 	if(infid>0)
 	  close(infid);
 	// Open and check dimensions of feature file
-	initInputFile(featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, false); // nommap, file pointer at correct position
+	strncpy(prefixedString, featureFileNameTable+pp.trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
+	prefix_name((char ** const) &prefixedString, adb_feature_root);
+	if (prefixedString!=tmpStr)
+	  delete[] tmpStr;
+	initInputFile(prefixedString, false); // nommap, file pointer at correct position
 	// Load the feature vector data for current track into data_buffer
 	read_data(infid, pp.trackID, &data_buffer, &data_buffer_size);	
 	// Load power and calculate power and l2norm sequence sums