changeset 431:8632cd387e24 api-inversion

Punishment gluttony. Continue teasing out vague orthogonalities by beginning the task of using an adb_query_parameters_t. This does have the benefit of making the distance calculation clearer, and we begin to see the shape of a putative audiodb_query() emerging from the shrapnel of audioDB::query. (Only the general shape; the detail is still a long, long way away).
author mas01cr
date Wed, 24 Dec 2008 10:55:40 +0000
parents 2d14d21f826b
children 62a0515f59be
files audioDB.h index.cpp query.cpp
diffstat 3 files changed, 127 insertions(+), 92 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.h	Wed Dec 24 10:55:36 2008 +0000
+++ b/audioDB.h	Wed Dec 24 10:55:40 2008 +0000
@@ -326,8 +326,8 @@
   void set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned int *nvp);
   void set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex);
   void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp);
-  void query_loop(adb_query_refine_t *refine, Uns32T queryIndex);
-  void query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors, adb_query_refine_t *refine);
+  void query_loop(adb_query_parameters_t *params, adb_query_refine_t *refine, Uns32T queryIndex);
+  void query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors, adb_query_parameters_t *params, adb_query_refine_t *refine);
   void initRNG();
   void initDBHeader(const char *dbName);
   void initInputFile(const char *inFile, bool loadData = true);
@@ -388,7 +388,7 @@
   Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp);
   void index_make_shingle(vector<vector<float> >*, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen);
   int index_norm_shingles(vector<vector<float> >*, double* snp, double* spp);
-  int index_query_loop(adb_query_refine_t *refine, const char* dbName, Uns32T queryIndex);
+  int index_query_loop(adb_query_parameters_t *params, adb_query_refine_t *refine, const char* dbName, Uns32T queryIndex);
   vector<vector<float> >* index_initialize_shingles(Uns32T sz);
   int index_init_query(const char* dbName);
   int index_exists(const char* dbName, double radius, Uns32T sequenceLength);
--- a/index.cpp	Wed Dec 24 10:55:36 2008 +0000
+++ b/index.cpp	Wed Dec 24 10:55:40 2008 +0000
@@ -575,7 +575,7 @@
 
 // return 0: if index does not exist
 // return nqv: if index exists
-int audioDB::index_query_loop(adb_query_refine_t *refine, const char* dbName, Uns32T queryIndex) {
+int audioDB::index_query_loop(adb_query_parameters_t *params, adb_query_refine_t *refine, const char* dbName, Uns32T queryIndex) {
   
   unsigned int numVectors = 0;
   double *query = 0, *query_data = 0;
@@ -583,6 +583,8 @@
   double meanQdur = 0;
   void (*add_point_func)(void*,Uns32T,Uns32T,float);
 
+  normalizedDistance = (params->distance == ADB_DISTANCE_EUCLIDEAN_NORMED);
+
   // Set the point-reporter callback based on the value of lsh_exact
   if(lsh_exact){
     initialize_exact_evalutation_queue();
@@ -603,9 +605,6 @@
 
   VERB_LOG(1, "retrieving tracks...");
   
-  assert(pointNN>0 && pointNN<=O2_MAXNN);
-  assert(trackNN>0 && trackNN<=O2_MAXNN);
-
   gettimeofday(&tv1, NULL);   
   // query vector index
   Uns32T Nq = (numVectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:numVectors) - sequenceLength + 1;
@@ -640,7 +639,7 @@
 
   if(lsh_exact)
     // Perform exact distance computation on point pairs in exact_evaluation_queue
-    query_loop_points(query, qnPtr, qpPtr, meanQdur, numVectors, refine); 
+    query_loop_points(query, qnPtr, qpPtr, meanQdur, numVectors, params, refine); 
   
   gettimeofday(&tv2,NULL);
   VERB_LOG(1,"elapsed time: %ld msec\n",
--- a/query.cpp	Wed Dec 24 10:55:36 2008 +0000
+++ b/query.cpp	Wed Dec 24 10:55:40 2008 +0000
@@ -20,7 +20,14 @@
 
 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
 
+  // init database tables and dbH first
+  if(query_from_key)
+    initTables(dbName);
+  else
+    initTables(dbName, inFile);
+
   adb_query_refine_t refine;
+  adb_query_parameters_t params;
   refine.flags = 0;
   /* FIXME: trackFile / ADB_REFINE_KEYLIST */
   if(radius) {
@@ -45,83 +52,114 @@
     refine.hopsize = sequenceHop;
   }
 
-  // init database tables and dbH first
-  if(query_from_key)
-    initTables(dbName);
-  else
-    initTables(dbName, inFile);
+  switch(queryType) {
+  case O2_POINT_QUERY:
+    sequenceLength = 1;
+    params.accumulation = ADB_ACCUMULATION_DB;
+    params.distance = ADB_DISTANCE_DOT_PRODUCT;
+    params.npoints = pointNN;
+    params.ntracks = 0;
+    reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN);
+    break;
+  case O2_TRACK_QUERY:
+    sequenceLength = 1;
+    params.accumulation = ADB_ACCUMULATION_PER_TRACK;
+    params.distance = ADB_DISTANCE_DOT_PRODUCT;
+    params.npoints = pointNN;
+    params.ntracks = trackNN;
+    reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles);
+    break;
+  case O2_SEQUENCE_QUERY:
+  case O2_N_SEQUENCE_QUERY:
+    params.accumulation = ADB_ACCUMULATION_PER_TRACK;
+    params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED;
+    params.npoints = pointNN;
+    params.ntracks = trackNN;
+    switch(queryType) {
+    case O2_SEQUENCE_QUERY:
+      if(!(refine.flags & ADB_REFINE_RADIUS)) {
+        reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles);
+      } else if (index_exists(dbName, radius, sequenceLength)) {
+	char* indexName = index_get_name(dbName, radius, sequenceLength);
+	lsh = index_allocate(indexName, false);
+	reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
+	delete[] indexName;
+      } else {
+	reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
+      }
+      break;
+    case O2_N_SEQUENCE_QUERY:
+      if(!(refine.flags & ADB_REFINE_RADIUS)) {
+        reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
+      } else if (index_exists(dbName, radius, sequenceLength)){
+	char* indexName = index_get_name(dbName, radius, sequenceLength);
+	lsh = index_allocate(indexName, false);
+	reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
+	delete[] indexName;
+      } else {
+	reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
+      }
+      break;
+    }
+    break;
+  case O2_ONE_TO_ONE_N_SEQUENCE_QUERY:
+    params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE;
+    params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED;
+    params.npoints = 0;
+    params.ntracks = 0;
+    break;
+  default:
+    error("unrecognized queryType");
+  }
 
   // keyKeyPos requires dbH to be initialized
   if(query_from_key && (!key || (query_from_key_index = audiodb_key_index(adb, key)) == (uint32_t) -1)) 
     error("Query key not found", key);  
-  
-  switch (queryType) {
-  case O2_POINT_QUERY:
-    sequenceLength = 1;
-    normalizedDistance = false;
-    reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN);
-    accumulator = new DBAccumulator<adb_result_dist_gt>(pointNN);
-    break;
-  case O2_TRACK_QUERY:
-    sequenceLength = 1;
-    normalizedDistance = false;
-    reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles);
-    accumulator = new PerTrackAccumulator<adb_result_dist_gt>(pointNN, trackNN);
-    break;
-  case O2_SEQUENCE_QUERY:    
-    if(no_unit_norming)
-      normalizedDistance = false;
-    accumulator = new PerTrackAccumulator<adb_result_dist_lt>(pointNN, trackNN);
-    if(!(refine.flags & ADB_REFINE_RADIUS)) {
-      reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles);
-    } else {
-      if(index_exists(dbName, radius, sequenceLength)){
-	char* indexName = index_get_name(dbName, radius, sequenceLength);
-	lsh = index_allocate(indexName, false);
-	reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
-	delete[] indexName;
-      }
-      else
-	reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
+
+  switch(params.distance) {
+  case ADB_DISTANCE_DOT_PRODUCT:
+    switch(params.accumulation) {
+    case ADB_ACCUMULATION_DB:
+      accumulator = new DBAccumulator<adb_result_dist_gt>(params.npoints);
+      break;
+    case ADB_ACCUMULATION_PER_TRACK:
+      accumulator = new PerTrackAccumulator<adb_result_dist_gt>(params.npoints, params.ntracks);
+      break;
+    case ADB_ACCUMULATION_ONE_TO_ONE:
+      accumulator = new NearestAccumulator<adb_result_dist_gt>();
+      break;
+    default:
+      error("unknown accumulation");
     }
     break;
-  case O2_N_SEQUENCE_QUERY:
-    if(no_unit_norming)
-      normalizedDistance = false;
-    accumulator = new PerTrackAccumulator<adb_result_dist_lt>(pointNN, trackNN);
-    if(!(refine.flags & ADB_REFINE_RADIUS)) {
-      reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles);
-    } else {
-      if(index_exists(dbName, radius, sequenceLength)){
-	char* indexName = index_get_name(dbName, radius, sequenceLength);
-	lsh = index_allocate(indexName, false);
-	reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1);
-	delete[] indexName;
-      }
-      else
-	reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
-    }
-    break;
-  case O2_ONE_TO_ONE_N_SEQUENCE_QUERY :
-    accumulator = new NearestAccumulator<adb_result_dist_lt>();
-    if(!(refine.flags & ADB_REFINE_RADIUS)) {
-      error("query-type not yet supported");
-    } else {
-      reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles);
+  case ADB_DISTANCE_EUCLIDEAN_NORMED:
+  case ADB_DISTANCE_EUCLIDEAN:
+    switch(params.accumulation) {
+    case ADB_ACCUMULATION_DB:
+      accumulator = new DBAccumulator<adb_result_dist_lt>(params.npoints);
+      break;
+    case ADB_ACCUMULATION_PER_TRACK:
+      accumulator = new PerTrackAccumulator<adb_result_dist_lt>(params.npoints, params.ntracks);
+      break;
+    case ADB_ACCUMULATION_ONE_TO_ONE:
+      accumulator = new NearestAccumulator<adb_result_dist_lt>();
+      break;
+    default:
+      error("unknown accumulation");
     }
     break;
   default:
-    error("unrecognized queryType in query()");
-  }  
-
+    error("unknown distance function");
+  }
+  
   // Test for index (again) here
   if((refine.flags & ADB_REFINE_RADIUS) && index_exists(dbName, radius, sequenceLength)){ 
     VERB_LOG(1, "Calling indexed query on database %s, radius=%f, sequenceLength=%d\n", dbName, radius, sequenceLength);
-    index_query_loop(&refine, dbName, query_from_key_index);
+    index_query_loop(&params, &refine, dbName, query_from_key_index);
   }
   else{
     VERB_LOG(1, "Calling brute-force query on database %s\n", dbName);
-    query_loop(&refine, query_from_key_index);
+    query_loop(&params, &refine, query_from_key_index);
   }
 
   adb_query_results_t *rs = accumulator->get_points();
@@ -515,7 +553,7 @@
 // Postconditions:
 // reporter contains the points and distances that meet the reporter constraints 
 
-void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors, adb_query_refine_t *refine){ 
+void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors, adb_query_parameters_t *params, adb_query_refine_t *refine){ 
   unsigned int dbVectors;
   double *sNorm = 0, *snPtr, *sPower = 0, *spPtr = 0;
   double *meanDBdur = 0;
@@ -533,9 +571,6 @@
 
   VERB_LOG(1, "matching points...");
 
-  assert(pointNN>0 && pointNN<=O2_MAXNN);
-  assert(trackNN>0 && trackNN<=O2_MAXNN);
-
   // We are guaranteed that the order of points is sorted by:
   // trackID, spos, qpos
   // so we can be relatively efficient in initialization of track data.
@@ -597,13 +632,14 @@
       dist = audiodb_dot_product(query+qPos*dbH->dim, data_buffer+pp.spos*dbH->dim, dbH->dim*sequenceLength);
       double qn = qnPtr[qPos];
       double sn = sNorm[sPos];
-      if(normalizedDistance) 
+      switch(params->distance) {
+      case ADB_DISTANCE_EUCLIDEAN_NORMED:
 	dist = 2 - (2/(qn*sn))*dist;
-      else 
-	if(no_unit_norming)
-	  dist = qn*qn + sn*sn - 2*dist;
-      // else
-      // dist = dist;      
+        break;
+      case ADB_DISTANCE_EUCLIDEAN:
+        dist = qn*qn + sn*sn - 2*dist;
+        break;
+      }
       if((!radius) || dist <= (O2_LSH_EXACT_MULT*radius+O2_DISTANCE_TOLERANCE)) {
         adb_result_t r;
         r.key = fileTable + pp.trackID * O2_FILETABLE_ENTRY_SIZE;
@@ -621,7 +657,7 @@
   SAFE_DELETE_ARRAY(meanDBdur);
 }
 
-void audioDB::query_loop(adb_query_refine_t *refine, Uns32T queryIndex) {
+void audioDB::query_loop(adb_query_parameters_t *params, adb_query_refine_t *refine, Uns32T queryIndex) {
   
   unsigned int numVectors;
   double *query, *query_data;
@@ -644,9 +680,6 @@
 
   VERB_LOG(1, "matching tracks...");
   
-  assert(pointNN>0 && pointNN<=O2_MAXNN);
-  assert(trackNN>0 && trackNN<=O2_MAXNN);
-
   unsigned j,k,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
   double **D = 0;    // Differences query and target 
   double **DD = 0;   // Matched filter distance
@@ -716,15 +749,18 @@
 	// Search for minimum distance by shingles (concatenated vectors)
 	for(j = 0; j <= numVectors - wL; j += HOP_SIZE) {
 	  for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) {
-            double thisDist;
-            if(normalizedDistance) 
+            double thisDist = 0;
+            switch(params->distance) {
+            case ADB_DISTANCE_EUCLIDEAN_NORMED:
               thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
-	    else 
-	      if(no_unit_norming)
-		thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k];
-	      else
-		thisDist = DD[j][k];
-
+              break;
+            case ADB_DISTANCE_EUCLIDEAN:
+              thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k];
+              break;
+            case ADB_DISTANCE_DOT_PRODUCT:
+              thisDist = DD[j][k];
+              break;
+            }
 	    // Power test
 	    if ((!usingPower) || audiodb_powers_acceptable(refine, qpPtr[j], sPower[trackIndexOffset + k])) {
               // radius test