changeset 459:fcc6f7c4856b api-inversion

No more global shingle vector of vectors. Convert audioDB::index_initialize_shingles and audioDB::index_norm_shingles to plain old functions. In doing so, the latter in particular acquires a silly argument list; we need that complexity for now because it's called both from audioDB::query (which we're currently inverting) and from audioDB::index (which is out of scope for now). The loss of the global vv thing made me check up on memory discipline [hence the new API function audiodb_query_free_results() as well as the internal audiodb_index_delete_shingles()]. It's not too bad, but there are plenty of leaks for those with time to do AUDIODB="valgrind --leak-check=full ../../audioDB" sh ./run-test.sh on their favourite test case. For example, the Radius reporters leak one triple per hit. (Honestly, C++ memory management is teh suck.)
author mas01cr
date Sun, 28 Dec 2008 22:43:50 +0000
parents 913a95f06998
children 17003dff8127
files audioDB.cpp audioDB.h audioDB_API.h index.cpp query.cpp
diffstat 5 files changed, 45 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.cpp	Sun Dec 28 18:44:08 2008 +0000
+++ b/audioDB.cpp	Sun Dec 28 22:43:50 2008 +0000
@@ -219,8 +219,6 @@
     delete reporter;
   if(rng)
     gsl_rng_free(rng);
-  if(vv)
-    delete vv;
   if(infid>0)
     close(infid);
   if(adb && !UseApiError) {
--- a/audioDB.h	Sun Dec 28 18:44:08 2008 +0000
+++ b/audioDB.h	Sun Dec 28 22:43:50 2008 +0000
@@ -376,19 +376,13 @@
   Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration
   Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row
 
-
-  // LSH vector<> containers for one in-core copy of a set of feature vectors
-  vector<vector<float> > *vv;  // one-track's worth data
-
   // LSH indexing and retrieval methods  
   void index_index_db(const char* dbName);
   void index_initialize(double**,double**,double**,double**,unsigned int*);
   void index_insert_tracks(Uns32T start_track, Uns32T end_track, double** fvpp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
   int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp);
   Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp);
-  int index_norm_shingles(vector<vector<float> >*, double* snp, double* spp);
   int index_query_loop(adb_t *adb, adb_query_spec_t *spec, adb_qstate_internal_t *qstate);
-  vector<vector<float> >* index_initialize_shingles(Uns32T sz);
   int index_init_query(const char* dbName);
   int index_exists(const char* dbName, double radius, Uns32T sequenceLength);
   char* index_get_name(const char*dbName, double radius, Uns32T sequenceLength);
@@ -483,6 +477,5 @@
     lsh_param_m(0),				\
     lsh_param_N(0),				\
     lsh_param_b(0),				\
-    lsh_param_ncols(0),                         \
-    vv(0)
+    lsh_param_ncols(0)
 #endif
--- a/audioDB_API.h	Sun Dec 28 18:44:08 2008 +0000
+++ b/audioDB_API.h	Sun Dec 28 22:43:50 2008 +0000
@@ -188,6 +188,7 @@
 /* query function */
 int audiodb_query(adb_ptr mydb, adb_query_ptr adbq, adb_queryresult_ptr adbqres);
 int audiodb_query_spec(adb_t *, adb_query_spec_t *, adb_query_results_t *);
+int audiodb_query_free_results(adb_t *, adb_query_spec_t *, adb_query_results_t *);
 
 /* database status */  
 int audiodb_status(adb_ptr mydb, adb_status_ptr status);
--- a/index.cpp	Sun Dec 28 18:44:08 2008 +0000
+++ b/index.cpp	Sun Dec 28 22:43:50 2008 +0000
@@ -62,15 +62,18 @@
   return audioDB::lsh;
 }
 
-vector<vector<float> >* audioDB::index_initialize_shingles(Uns32T sz){
-  if(vv)
-    delete vv;
-  vv = new vector<vector<float> >(sz);
-  for(Uns32T i=0 ; i < sz ; i++)
-    (*vv)[i]=vector<float>(dbH->dim*sequenceLength);  // allocate shingle storage
+vector<vector<float> > *audiodb_index_initialize_shingles(Uns32T sz, Uns32T dim, Uns32T seqLen) {
+  std::vector<std::vector<float> > *vv = new vector<vector<float> >(sz);
+  for(Uns32T i=0 ; i < sz ; i++) {
+    (*vv)[i]=vector<float>(dim * seqLen);
+  }
   return vv;
 }
 
+void audiodb_index_delete_shingles(vector<vector<float> > *vv) {
+  delete vv;
+}
+
 /********************  LSH indexing audioDB database access forall s \in {S} ***********************/
 
 // Prepare the AudioDB database for read access and allocate auxillary memory
@@ -154,26 +157,28 @@
 // norm shingles
 // in-place norming, no deletions
 // If using power, return number of shingles above power threshold
-int audioDB::index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp){  
+int audiodb_index_norm_shingles(vector<vector<float> >* vv, double* snp, double* spp, Uns32T dim, Uns32T seqLen, double radius, bool normed_vectors, bool use_pthreshold, float pthreshold) {
   int z = 0; // number of above-threshold shingles
   float l2norm;
   double power;
   float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2
   float oneOverSqrtl2NormDivRad = oneOverRadius;
-  if(!spp)
-    error("LSH indexing and query requires a power feature using -w or -W");
-  Uns32T shingleSize = sequenceLength*dbH->dim;
+  Uns32T shingleSize = seqLen * dim;
+
+  if(!spp) {
+    return -1;
+  }
   for(Uns32T a=0; a<(*vv).size(); a++){
     l2norm = (float)(*snp++);
-    if(audioDB::normalizedDistance)
+    if(normed_vectors)
       oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius;
     
     for(Uns32T b=0; b < shingleSize ; b++)
       (*vv)[a][b]*=oneOverSqrtl2NormDivRad;
 
     power = *spp++;
-    if(use_absolute_threshold){
-      if ( power >= absolute_threshold )
+    if(use_pthreshold){
+      if (power >= pthreshold)
 	z++;
     }
     else
@@ -438,17 +443,23 @@
   
   Uns32T numVecsAboveThreshold = 0, collisionCount = 0; 
   if(numVecs){
-    vv = index_initialize_shingles(numVecs);
+    std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(numVecs, dbH->dim, sequenceLength);
     
     for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ )
       audiodb_index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength);
-    
-    numVecsAboveThreshold = index_norm_shingles(vv, *snpp, *sppp);
+    int vcount = audiodb_index_norm_shingles(vv, *snpp, *sppp, dbH->dim, sequenceLength, radius, normalizedDistance, use_absolute_threshold, absolute_threshold);
+    if(vcount == -1) {
+      audiodb_index_delete_shingles(vv);
+      error("failed to norm shingles");
+    }
+    numVecsAboveThreshold = vcount;
     collisionCount = index_insert_shingles(vv, trackID, *sppp);
+    audiodb_index_delete_shingles(vv);
   }
+
   float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0;
 
-  /* index_norm_shingles() only goes as far as the end of the
+  /* audiodb_index_norm_shingles() only goes as far as the end of the
      sequence, which is right, but the space allocated is for the
      whole track.  */
 
@@ -594,14 +605,19 @@
 
   // query vector index
   Uns32T Nq = (qpointers.nvectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:qpointers.nvectors) - sequenceLength + 1;
-  vv = index_initialize_shingles(Nq); // allocate memory to copy query vectors to shingles
+  std::vector<std::vector<float> > *vv = audiodb_index_initialize_shingles(Nq, adb->header->dim, sequenceLength); // allocate memory to copy query vectors to shingles
 
   // Construct shingles from query features  
   for( Uns32T pointID = 0 ; pointID < Nq ; pointID++ ) 
     audiodb_index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength);
   
   // Normalize query vectors
-  Uns32T numVecsAboveThreshold = index_norm_shingles( vv, qpointers.l2norm, qpointers.power );
+  int vcount = audiodb_index_norm_shingles(vv, qpointers.l2norm, qpointers.power, dbH->dim, sequenceLength, radius, normalizedDistance, use_absolute_threshold, absolute_threshold);
+  if(vcount == -1) {
+    audiodb_index_delete_shingles(vv);
+    error("failed to norm shingles");
+  }
+  Uns32T numVecsAboveThreshold = vcount;
 
   // Nq contains number of inspected points in query file, 
   // numVecsAboveThreshold is number of points with power >= absolute_threshold
@@ -621,6 +637,7 @@
 	  lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, &callback_data);   
         }
       }
+  audiodb_index_delete_shingles(vv);
 
   if(lsh_exact)
     // Perform exact distance computation on point pairs in exact_evaluation_queue
--- a/query.cpp	Sun Dec 28 18:44:08 2008 +0000
+++ b/query.cpp	Sun Dec 28 22:43:50 2008 +0000
@@ -258,10 +258,17 @@
     adb_result_t r = rs->results[k];
     reporter->add_point(audiodb_key_index(adb, r.key), r.qpos, r.ipos, r.dist);
   }
+  audiodb_query_free_results(adb, &qspec, rs);
 
   reporter->report(fileTable, adbQueryResponse);
 }
 
+int audiodb_query_free_results(adb_t *adb, adb_query_spec_t *spec, adb_query_results_t *rs) {
+  free(rs->results);
+  free(rs);
+  return 0;
+}
+
 static void audiodb_initialize_arrays(adb_t *adb, adb_query_spec_t *spec, int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD) {
   unsigned int j, k, l, w;
   double *dp, *qp, *sp;