changeset 465:1030664df98c api-inversion

No more audioDB::index_allocate and audioDB::index_init_query No more SERVER_LSH_INDEX_SINGLETON, either; instead each adb_t contains a single cache of the last used in-core index. At the moment, this cache is unused by the server (and the previous cache code has been replaced by a comment), but I think that this way everyone can be allowed to benefit without anyone having to explicitly manage indexes themselves. I'm not going to say how long I wandered in a maze of valgrind before giving up and keeping the hacky workaround for loading the lsh tables [see the FIXME comment in audiodb_index_init_query()]; let's just say that it was long enough to find the extra bonus crashy close(lshfid) in audioDB::index_index_db. Also, delete the abstraction-inverting LSH stuff from query.cpp where we are making our reporters; the fix for that, which is presumably when creating small indexes for large datasets, is to implement space-efficient reporters. (The accumulator code, which is my second attempt, is more space-efficient than the reporters; inspiration may wish to be drawn...)
author mas01cr
date Tue, 30 Dec 2008 23:56:57 +0000
parents 35bb388d0eac
children 11fccb6a3bd5
files audioDB-internals.h audioDB.cpp audioDB.h close.cpp index.cpp open.cpp query.cpp soap.cpp
diffstat 8 files changed, 53 insertions(+), 91 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB-internals.h	Tue Dec 30 15:38:59 2008 +0000
+++ b/audioDB-internals.h	Tue Dec 30 23:56:57 2008 +0000
@@ -52,6 +52,7 @@
   std::map<std::string,uint32_t> *keymap;
   std::vector<uint32_t> *track_lengths;
   std::vector<off_t> *track_offsets;
+  LSH *cached_lsh;
 };
 
 typedef struct {
--- a/audioDB.cpp	Tue Dec 30 15:38:59 2008 +0000
+++ b/audioDB.cpp	Tue Dec 30 23:56:57 2008 +0000
@@ -4,7 +4,6 @@
 #include "audioDB-internals.h"
 }
 
-LSH* SERVER_LSH_INDEX_SINGLETON;
 char* SERVER_ADB_ROOT;
 char* SERVER_ADB_FEATURE_ROOT;
 
@@ -225,7 +224,7 @@
     audiodb_close(adb);
     adb = NULL;
   }
-  if(lsh!=SERVER_LSH_INDEX_SINGLETON)
+  if(lsh)
     delete lsh;
 }
 
@@ -826,7 +825,6 @@
 // This entry point is visited once per instance
 // so it is a good place to set any global state variables
 int main(const int argc, const char* argv[]){
-  SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables
   SERVER_ADB_ROOT = 0;            // Server-side database root prefix
   SERVER_ADB_FEATURE_ROOT = 0;    // Server-side features root prefix
   audioDB(argc, argv);
--- a/audioDB.h	Tue Dec 30 15:38:59 2008 +0000
+++ b/audioDB.h	Tue Dec 30 23:56:57 2008 +0000
@@ -47,6 +47,7 @@
   Accumulator *accumulator;
   std::set<std::string> *allowed_keys;
   std::priority_queue<PointPair> *exact_evaluation_queue;
+  LSH *lsh;
 } adb_qstate_internal_t;
 
 #define MAXSTR 512
@@ -198,7 +199,6 @@
 #define SAFE_DELETE(PTR) delete PTR; PTR=0;
 #define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0;
 
-extern LSH* SERVER_LSH_INDEX_SINGLETON;
 extern char* SERVER_ADB_ROOT;
 extern char* SERVER_ADB_FEATURE_ROOT;
 
@@ -365,9 +365,6 @@
   int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp);
   Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp);
   int index_query_loop(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate);
-  int index_init_query(const char* dbName);
-  int index_exists(const char* dbName, double radius, Uns32T sequenceLength);
-  LSH* index_allocate(char* indexName, bool load_hashTables);
   void insertPowerData(unsigned n, int powerfd, double *powerdata);
   void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
   
--- a/close.cpp	Tue Dec 30 15:38:59 2008 +0000
+++ b/close.cpp	Tue Dec 30 23:56:57 2008 +0000
@@ -11,6 +11,9 @@
   delete adb->keymap;
   delete adb->track_lengths;
   delete adb->track_offsets;
+  if(adb->cached_lsh) {
+    delete adb->cached_lsh;
+  }
   close(adb->fd);
   free(adb);
 }
--- a/index.cpp	Tue Dec 30 15:38:59 2008 +0000
+++ b/index.cpp	Tue Dec 30 23:56:57 2008 +0000
@@ -62,21 +62,23 @@
   }
 }
 
-// If we are a server and have a memory-resident index, check the indexName against the resident index (using get_indexName())
-// If they match, i.e. path+dbName_resident == path+dbName_requested, use
-// the memory-resident index.
-// Else allocate a new LSH instance and load the index from disk
-LSH* audioDB::index_allocate(char* indexName, bool load_hashTables){
-  LSH* gIndx=SERVER_LSH_INDEX_SINGLETON;
-  if(isServer && gIndx && (strncmp(gIndx->get_indexName(), indexName, MAXSTR)==0) )
-    audioDB::lsh = gIndx; // Use the global SERVER resident index
-  else{
-    if(audioDB::lsh)
-      delete audioDB::lsh;
-    audioDB::lsh = new LSH(indexName, load_hashTables);
+/* FIXME: the indexName arg should be "const char *", but the LSH
+ * library doesn't like that.
+ */
+LSH *audiodb_index_allocate(adb_t *adb, char *indexName, bool load_tables) {
+  LSH *lsh;
+  if(adb->cached_lsh) {
+    if(!strncmp(adb->cached_lsh->get_indexName(), indexName, MAXSTR)) {
+      return adb->cached_lsh;
+    } else {
+      delete adb->cached_lsh;
+    }
   }
-  assert(audioDB::lsh);  
-  return audioDB::lsh;
+  lsh = new LSH(indexName, load_tables);
+  if(load_tables) {
+    adb->cached_lsh = lsh;
+  } 
+  return lsh;
 }
 
 vector<vector<float> > *audiodb_index_initialize_shingles(Uns32T sz, Uns32T dim, Uns32T seqLen) {
@@ -268,6 +270,7 @@
     // Clean up
     delete lsh;
     lsh = 0;
+  } else {
     close(lshfid);
   }
   
@@ -513,48 +516,27 @@
 
 
 // return true if indexed query performed else return false
-int audioDB::index_init_query(const char* dbName){
+int audiodb_index_init_query(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate, bool corep) {
 
-  if(!(audiodb_index_exists(dbName, radius, sequenceLength)))
+  uint32_t sequence_length = spec->qid.sequence_length;
+  double radius = spec->refine.radius;
+  if(!(audiodb_index_exists(adb->path, radius, sequence_length)))
     return false;
 
-  char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength);
+  char *indexName = audiodb_index_get_name(adb->path, radius, sequence_length);
   if(!indexName) {
-    error("failed to get index name", dbName);
+    return false;
   }
 
-  // Test to see if file exists
-  if((lshfid = open (indexName, O_RDONLY)) < 0){
-    delete[] indexName;
-    return false;  
+  qstate->lsh = audiodb_index_allocate(adb, indexName, corep);
+
+  /* FIXME: it would be nice if the LSH library didn't make me do
+   * this. */
+  if((!corep) && (qstate->lsh->get_lshHeader()->flags & O2_SERIAL_FILEFORMAT2)) {
+    delete qstate->lsh;
+    qstate->lsh = audiodb_index_allocate(adb, indexName, true);
   }
 
-  lsh = index_allocate(indexName, false); // Get the header only here
-  sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim
-  
-  if(lsh!=SERVER_LSH_INDEX_SINGLETON){  
-    if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE))
-      printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius());
-    VERB_LOG(1,"INDEX: dim %d\n", (int)dbH->dim);
-    VERB_LOG(1,"INDEX: R %f\n", lsh->get_radius());
-    VERB_LOG(1,"INDEX: seqlen %d\n", sequenceLength);
-    VERB_LOG(1,"INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth());
-    VERB_LOG(1,"INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns());
-    VERB_LOG(1,"INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables());
-    VERB_LOG(1,"INDEX: N %d\n", lsh->get_lshHeader()->get_numRows());
-    VERB_LOG(1,"INDEX: s %d\n", audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb)));
-    VERB_LOG(1,"INDEX: Opened LSH index file %s\n", indexName);
-  }
-
-  // Check to see if we are loading hash tables into core, and do so if true
-  if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){
-    if(SERVER_LSH_INDEX_SINGLETON)
-      fprintf(stderr,"INDEX: using persistent hash tables: %s\n", lsh->get_indexName());
-    else
-      VERB_LOG(1,"INDEX: loading hash tables into core %s\n", (lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2)?"FORMAT2":"FORMAT1");
-    lsh = index_allocate(indexName, true);
-  }
-  
   delete[] indexName;
   return true;
 }
@@ -617,8 +599,9 @@
     add_point_func = &audiodb_index_add_point_approximate;  
   }
 
-  if(!index_init_query(adb->path)) // sets-up LSH index structures for querying
+  if(!audiodb_index_init_query(adb, spec, qstate, lsh_in_core)) {
     return 0;
+  }
 
   char *database = audiodb_index_get_name(adb->path, radius, sequenceLength);
   if(!database) {
@@ -649,18 +632,18 @@
   // numVecsAboveThreshold is number of points with power >= absolute_threshold
   double* qpp = qpointers.power; // Keep original qpPtr for possible exact evaluation
   if(usingQueryPoint && numVecsAboveThreshold){
-    if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core)
-      lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, &callback_data);
+    if((qstate->lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core)
+      qstate->lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, &callback_data);
     else
-      lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, &callback_data);
+      qstate->lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, &callback_data);
   }
   else if(numVecsAboveThreshold)
     for( Uns32T pointID = 0 ; pointID < Nq; pointID++ )
       if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) {
-	if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
-	  lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data);
+	if((qstate->lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) {
+	  qstate->lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, &callback_data);
         } else {
-	  lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data);   
+	  qstate->lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data);   
         }
       }
   audiodb_index_delete_shingles(vv);
@@ -669,9 +652,6 @@
     // Perform exact distance computation on point pairs in exact_evaluation_queue
     audiodb_query_queue_loop(adb, spec, qstate, query, &qpointers);
   
-  // Close the index file
-  close(lshfid);
-    
  // Clean up
   if(query_data)
     delete[] query_data;
@@ -683,6 +663,8 @@
     delete[] qpointers.mean_duration;
   if(database)
     delete[] database;
+  if(qstate->lsh != adb->cached_lsh)
+    delete qstate->lsh;
 
   return Nq;
 }
--- a/open.cpp	Tue Dec 30 15:38:59 2008 +0000
+++ b/open.cpp	Tue Dec 30 23:56:57 2008 +0000
@@ -128,6 +128,7 @@
   if(audiodb_collect_track_lengths(adb)) {
     goto error;
   }
+  adb->cached_lsh = 0;
   return adb;
 
  error:
--- a/query.cpp	Tue Dec 30 15:38:59 2008 +0000
+++ b/query.cpp	Tue Dec 30 23:56:57 2008 +0000
@@ -146,11 +146,6 @@
     case O2_SEQUENCE_QUERY:
       if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
         reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles);
-      } else if (audiodb_index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)) {
-	char *indexName = audiodb_index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length);
-	lsh = index_allocate(indexName, false);
-	reporter = new trackSequenceQueryRadReporter(trackNN, audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb))+1);
-	delete[] indexName;
       } else {
 	reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
       }
@@ -158,11 +153,6 @@
     case O2_N_SEQUENCE_QUERY:
       if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
         reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
-      } else if (audiodb_index_exists(adb->path, qspec.refine.radius, qspec.qid.sequence_length)){
-	char *indexName = audiodb_index_get_name(adb->path, qspec.refine.radius, qspec.qid.sequence_length);
-	lsh = index_allocate(indexName, false);
-	reporter = new trackSequenceQueryRadNNReporter(pointNN, trackNN, audiodb_index_to_track_id(lsh->get_maxp(), audiodb_lsh_n_point_bits(adb))+1);
-	delete[] indexName;
       } else {
 	reporter = new trackSequenceQueryRadNNReporter(pointNN, trackNN, dbH->numFiles);
       }
--- a/soap.cpp	Tue Dec 30 15:38:59 2008 +0000
+++ b/soap.cpp	Tue Dec 30 23:56:57 2008 +0000
@@ -446,22 +446,12 @@
   else
     {
       fprintf(stderr, "Socket connection successful: master socket = %d\n", m);
-      // Make a global Web Services LSH Index (SINGLETON)
-      if(WS_load_index && dbName && !audiodb_index_exists(dbName, radius, sequenceLength)){
-        /* FIXME: this leaks the indexName */
-        error("Can't find requested index file:", audiodb_index_get_name(dbName,radius,sequenceLength));
-      }
-      if(WS_load_index && dbName && audiodb_index_exists(dbName, radius, sequenceLength)){
-	char *indexName = audiodb_index_get_name(dbName, radius, sequenceLength);
-	fprintf(stderr, "Loading LSH hashtables: %s...\n", indexName);
-	lsh = new LSH(indexName, true);
-	assert(lsh);
-	SERVER_LSH_INDEX_SINGLETON = lsh;
-	fprintf(stderr, "LSH INDEX READY\n");
-	fflush(stderr);
-	delete[] indexName;
-      }
-      
+      /* FIXME: we used to have a global cache of a single LSH index
+       * here.  CSR removed it because it interacted badly with
+       * APIification of querying, replacing it with a per-open-adb
+       * cache; we should try to take advantage of that instead.
+       */
+
       // Server-side path prefix to databases and features
       if(adb_root)
 	SERVER_ADB_ROOT = (char*)adb_root; // Server-side database root