changeset 232:c76cdcf096fd refactoring

Proof of Reporter concept: restore pointQuery and trackPointQuery Only now it's "obvious" how they are the same and how they differ from the basic loop: * normalizedDistance is the only new feature added; * all the rest is in reporter stuff... * ... except that we have to turn on L2Norm, even if we don't use it. Alter the tests to cope with this fact, l2norming the point- and track-search databases.
author mas01cr
date Mon, 10 Dec 2007 20:01:43 +0000
parents a5d2fa43accf
children 542c22f4fadc
files audioDB.h query.cpp tests/0003/run-test.sh tests/0004/run-test.sh tests/0009/run-test.sh tests/0014/run-test.sh tests/0019/run-test.sh tests/0032/run-test.sh
diffstat 8 files changed, 226 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.h	Mon Dec 10 18:25:55 2007 +0000
+++ b/audioDB.h	Mon Dec 10 20:01:43 2007 +0000
@@ -167,6 +167,7 @@
   unsigned trackNN;   // how many track NNs ?
   unsigned sequenceLength;
   unsigned sequenceHop;
+  bool normalizedDistance;
   unsigned queryPoint;
   unsigned usingQueryPoint;
   unsigned usingTimes;
@@ -276,6 +277,7 @@
   trackNN(O2_DEFAULT_TRACKNN), \
   sequenceLength(16), \
   sequenceHop(1), \
+  normalizedDistance(true), \
   queryPoint(0), \
   usingQueryPoint(0), \
   usingTimes(0), \
--- a/query.cpp	Mon Dec 10 18:25:55 2007 +0000
+++ b/query.cpp	Mon Dec 10 20:01:43 2007 +0000
@@ -27,6 +27,10 @@
   return a.dist < b.dist;
 }
 
+bool operator> (const NNresult &a, const NNresult &b) {
+  return a.dist > b.dist;
+}
+
 bool operator<= (const NNresult &a, const NNresult &b) {
   return a.dist <= b.dist;
 }
@@ -39,9 +43,190 @@
 public:
   virtual ~Reporter() {};
   virtual void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) = 0;
+  // FIXME: this interface is a bit wacky: a relic of previous, more
+  // confused times.  Really it might make sense to have separate
+  // reporter classes for WS and for stdout, rather than passing this
+  // adbQueryResponse thing everywhere; the fileTable argument is
+  // there solely for convertion trackIDs into names.  -- CSR,
+  // 2007-12-10.
   virtual void report(char *fileTable, adb__queryResponse *adbQueryResponse) = 0;
 };
 
+class pointQueryReporter : public Reporter {
+public:
+  pointQueryReporter(unsigned int pointNN);
+  ~pointQueryReporter();
+  void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist);
+  void report(char *fileTable, adb__queryResponse *adbQueryResponse);
+private:
+  unsigned int pointNN;
+  std::priority_queue< NNresult, std::vector< NNresult >, std::greater< NNresult > > *queue;
+};
+
+pointQueryReporter::pointQueryReporter(unsigned int pointNN)
+  : pointNN(pointNN) {
+  queue = new std::priority_queue< NNresult, std::vector< NNresult >, std::greater< NNresult > >;
+}
+
+pointQueryReporter::~pointQueryReporter() {
+  delete queue;
+}
+
+void pointQueryReporter::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) {
+  NNresult r;
+  r.trackID = trackID;
+  r.qpos = qpos;
+  r.spos = spos;
+  r.dist = dist;
+  queue->push(r);
+  if(queue->size() > pointNN) {
+    queue->pop();
+  }
+}
+
+void pointQueryReporter::report(char *fileTable, adb__queryResponse *adbQueryResponse) {
+  NNresult r;
+  std::vector<NNresult> v;
+  unsigned int size = queue->size();
+  for(unsigned int k = 0; k < size; k++) {
+    r = queue->top();
+    v.push_back(r);
+    queue->pop();
+  }
+  std::vector<NNresult>::reverse_iterator rit;
+      
+  if(adbQueryResponse==0) {
+    for(rit = v.rbegin(); rit < v.rend(); rit++) {
+      r = *rit;
+      std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " ";
+      std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl;
+    }
+  } else {
+    adbQueryResponse->result.__sizeRlist=size;
+    adbQueryResponse->result.__sizeDist=size;
+    adbQueryResponse->result.__sizeQpos=size;
+    adbQueryResponse->result.__sizeSpos=size;
+    adbQueryResponse->result.Rlist= new char*[size];
+    adbQueryResponse->result.Dist = new double[size];
+    adbQueryResponse->result.Qpos = new unsigned int[size];
+    adbQueryResponse->result.Spos = new unsigned int[size];
+    unsigned int k = 0;
+    for(rit = v.rbegin(); rit < v.rend(); rit++, k++) {
+      r = *rit;
+      adbQueryResponse->result.Rlist[k] = new char[O2_MAXFILESTR];
+      adbQueryResponse->result.Dist[k] = r.dist;
+      adbQueryResponse->result.Qpos[k] = r.qpos;
+      adbQueryResponse->result.Spos[k] = r.spos;
+      snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE);
+    }
+  }
+}
+
+// FIXME: this trackPointQueryReporter is actually an almost
+// word-for-word copy of trackSequenceQueryNNReporter, below: the only
+// difference is that all priority queues use std::greater<NNresult>,
+// rather than the implicit default std::less<NNresult>.  There's
+// probably some clever C++ way of expressing that; find out and
+// delete one copy of the code.
+class trackPointQueryReporter : public Reporter {
+public:
+  trackPointQueryReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles);
+  ~trackPointQueryReporter();
+  void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist);
+  void report(char *fileTable, adb__queryResponse *adbQueryResponse);
+private:
+  unsigned int pointNN;
+  unsigned int trackNN;
+  unsigned int numFiles;
+  std::priority_queue< NNresult, std::vector< NNresult>, std::greater< NNresult > > *queues;
+};
+
+trackPointQueryReporter::trackPointQueryReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles)
+  : pointNN(pointNN), trackNN(trackNN), numFiles(numFiles) {
+  queues = new std::priority_queue< NNresult, std::vector< NNresult>, std::greater< NNresult > >[numFiles];
+}
+
+trackPointQueryReporter::~trackPointQueryReporter() {
+  delete [] queues;
+}
+
+void trackPointQueryReporter::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) {
+  NNresult r;
+  r.trackID = trackID;
+  r.qpos = qpos;
+  r.spos = spos;
+  r.dist = dist;
+  queues[trackID].push(r);
+  if(queues[trackID].size() > pointNN) {
+    queues[trackID].pop();
+  }
+}
+
+void trackPointQueryReporter::report(char *fileTable, adb__queryResponse *adbQueryResponse) {
+  std::priority_queue < NNresult, std::vector< NNresult>, std::greater< NNresult > > result;
+  for (int i = numFiles-1; i >= 0; i--) {
+    unsigned int size = queues[i].size();
+    if (size > 0) {
+      NNresult r;
+      double dist = 0;
+      NNresult oldr = queues[i].top();
+      for (unsigned int j = 0; j < size; j++) {
+        r = queues[i].top();
+        dist += r.dist;
+        queues[i].pop();
+        if (r.dist == oldr.dist) {
+          r.qpos = oldr.qpos;
+          r.spos = oldr.spos;
+        } else {
+          oldr = r;
+        }
+      }
+      dist /= size;
+      r.dist = dist; // trackID, qpos and spos are magically right already.
+      result.push(r);
+      if (result.size() > trackNN) {
+        result.pop();
+      }
+    }
+  }
+
+  NNresult r;
+  std::vector<NNresult> v;
+  unsigned int size = result.size();
+  for(unsigned int k = 0; k < size; k++) {
+    r = result.top();
+    v.push_back(r);
+    result.pop();
+  }
+  std::vector<NNresult>::reverse_iterator rit;
+      
+  if(adbQueryResponse==0) {
+    for(rit = v.rbegin(); rit < v.rend(); rit++) {
+      r = *rit;
+      std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " ";
+      std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl;
+    }
+  } else {
+    adbQueryResponse->result.__sizeRlist=size;
+    adbQueryResponse->result.__sizeDist=size;
+    adbQueryResponse->result.__sizeQpos=size;
+    adbQueryResponse->result.__sizeSpos=size;
+    adbQueryResponse->result.Rlist= new char*[size];
+    adbQueryResponse->result.Dist = new double[size];
+    adbQueryResponse->result.Qpos = new unsigned int[size];
+    adbQueryResponse->result.Spos = new unsigned int[size];
+    unsigned int k = 0;
+    for(rit = v.rbegin(); rit < v.rend(); rit++, k++) {
+      r = *rit;
+      adbQueryResponse->result.Rlist[k] = new char[O2_MAXFILESTR];
+      adbQueryResponse->result.Dist[k] = r.dist;
+      adbQueryResponse->result.Qpos[k] = r.qpos;
+      adbQueryResponse->result.Spos[k] = r.spos;
+      snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE);
+    }
+  }
+}
+
 class trackSequenceQueryNNReporter : public Reporter {
 public:
   trackSequenceQueryNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles);
@@ -229,22 +414,33 @@
 
 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
   initTables(dbName, inFile);
-
+  Reporter *r = 0;
   switch (queryType) {
+  case O2_POINT_QUERY:
+    sequenceLength = 1;
+    normalizedDistance = false;
+    r = new pointQueryReporter(pointNN);
+    trackSequenceQueryNN(dbName, inFile, r);
+    break;
+  case O2_TRACK_QUERY:
+    sequenceLength = 1;
+    normalizedDistance = false;
+    r = new trackPointQueryReporter(pointNN, trackNN, dbH->numFiles);
+    trackSequenceQueryNN(dbName, inFile, r);
+    break;
   case O2_SEQUENCE_QUERY:
-    Reporter *r;
     if(radius == 0) {
       r = new trackSequenceQueryNNReporter(pointNN, trackNN, dbH->numFiles);
     } else {
       r = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles);
     }
     trackSequenceQueryNN(dbName, inFile, r);
-    r->report(fileTable, adbQueryResponse);
-    delete r;
     break;
   default:
     error("unrecognized queryType in query()");
   }  
+  r->report(fileTable, adbQueryResponse);
+  delete r;
 }
 
 // return ordinal position of key in keyTable
@@ -563,9 +759,9 @@
   off_t *trackOffsetTable = new off_t[dbH->numFiles];
   unsigned cumTrack=0;
   off_t trackIndexOffset;
-  for(k=0; k<dbH->numFiles;k++){
-    trackOffsetTable[k]=cumTrack;
-    cumTrack+=trackTable[k]*dbH->dim;
+  for(k = 0; k < dbH->numFiles; k++){
+    trackOffsetTable[k] = cumTrack;
+    cumTrack += trackTable[k] * dbH->dim;
   }
 
   char nextKey[MAXSTR];
@@ -610,9 +806,14 @@
         }
 
 	// Search for minimum distance by shingles (concatenated vectors)
-	for(j=0;j<=numVectors-wL;j+=HOP_SIZE) {
-	  for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
-	    double thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
+	for(j = 0; j <= numVectors - wL; j += HOP_SIZE) {
+	  for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) {
+            double thisDist;
+            if(normalizedDistance) {
+              thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
+            } else {
+              thisDist = DD[j][k];
+            }
 	    // Power test
 	    if ((!usingPower) || powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k])) {
               // radius test
--- a/tests/0003/run-test.sh	Mon Dec 10 18:25:55 2007 +0000
+++ b/tests/0003/run-test.sh	Mon Dec 10 20:01:43 2007 +0000
@@ -1,13 +1,14 @@
 #! /bin/sh
 
-exit 14
-
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
 
 ${AUDIODB} -d testdb -N
 
+# point query now implemented as sequence search
+${AUDIODB} -d testdb -L
+
 # We could contemplate putting the test feature (and the expected
 # query output) under svn control if we trust its binary file
 # handling.
--- a/tests/0004/run-test.sh	Mon Dec 10 18:25:55 2007 +0000
+++ b/tests/0004/run-test.sh	Mon Dec 10 20:01:43 2007 +0000
@@ -1,13 +1,13 @@
 #! /bin/sh
 
-exit 14
-
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
 
 ${AUDIODB} -d testdb -N
 
+${AUDIODB} -d testdb -L
+
 intstring 2 > testfeature
 floatstring 0 1 >> testfeature
 floatstring 1 0 >> testfeature
--- a/tests/0009/run-test.sh	Mon Dec 10 18:25:55 2007 +0000
+++ b/tests/0009/run-test.sh	Mon Dec 10 20:01:43 2007 +0000
@@ -1,13 +1,13 @@
 #! /bin/sh
 
-exit 14
-
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
 
 ${AUDIODB} -d testdb -N
 
+${AUDIODB} -d testdb -L
+
 intstring 2 > testfeature01
 floatstring 0 1 >> testfeature01
 intstring 2 > testfeature10
--- a/tests/0014/run-test.sh	Mon Dec 10 18:25:55 2007 +0000
+++ b/tests/0014/run-test.sh	Mon Dec 10 20:01:43 2007 +0000
@@ -1,13 +1,13 @@
 #! /bin/sh
 
-exit 14
-
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
 
 ${AUDIODB} -d testdb -N
 
+${AUDIODB} -d testdb -L
+
 intstring 2 > testfeature
 floatstring 0 1 >> testfeature
 floatstring 1 0 >> testfeature
--- a/tests/0019/run-test.sh	Mon Dec 10 18:25:55 2007 +0000
+++ b/tests/0019/run-test.sh	Mon Dec 10 20:01:43 2007 +0000
@@ -1,13 +1,13 @@
 #! /bin/sh
 
-exit 14
-
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
 
 ${AUDIODB} -d testdb -N
 
+${AUDIODB} -d testdb -L
+
 intstring 2 > testfeature01
 floatstring 0 1 >> testfeature01
 intstring 2 > testfeature10
--- a/tests/0032/run-test.sh	Mon Dec 10 18:25:55 2007 +0000
+++ b/tests/0032/run-test.sh	Mon Dec 10 20:01:43 2007 +0000
@@ -1,13 +1,13 @@
 #! /bin/sh
 
-exit 14
-
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
 
 ${AUDIODB} -d testdb -N
 
+${AUDIODB} -d testdb -L
+
 intstring 2 > testfeature01
 floatstring 0 1 >> testfeature01
 intstring 2 > testfeature10