changeset 263:210b2f661b88

Added new query type "onetoonensequence" for matching each query sequence to the single closest match in the database. Most useful if query is not also in the database.
author mas01mc
date Mon, 09 Jun 2008 19:20:39 +0000
parents 34ce7f7a177d
children 1cec738101a8
files audioDB.cpp audioDB.h gengetopt.in query.cpp reporter.h
diffstat 5 files changed, 110 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.cpp	Mon May 19 16:25:12 2008 +0000
+++ b/audioDB.cpp	Mon Jun 09 19:20:39 2008 +0000
@@ -330,6 +330,8 @@
       queryType=O2_SEQUENCE_QUERY;
     else if(strncmp(args_info.QUERY_arg, "nsequence", MAXSTR)==0)
       queryType=O2_N_SEQUENCE_QUERY;
+    else if(strncmp(args_info.QUERY_arg, "onetoonensequence", MAXSTR)==0)
+      queryType=O2_ONE_TO_ONE_N_SEQUENCE_QUERY;
     else
       error("unsupported query type",args_info.QUERY_arg);
     
@@ -341,12 +343,12 @@
     }
     
     pointNN = args_info.pointnn_arg;
-    if(pointNN < 1 || pointNN > 1000) {
-      error("pointNN out of range: 1 <= pointNN <= 1000");
+    if(pointNN < 1 || pointNN > O2_MAXNN) {
+      error("pointNN out of range: 1 <= pointNN <= 1000000");
     }
     trackNN = args_info.resultlength_arg;
-    if(trackNN < 1 || trackNN > 1000) {
-      error("resultlength out of range: 1 <= resultlength <= 1000");
+    if(trackNN < 1 || trackNN > O2_MAXNN) {
+      error("resultlength out of range: 1 <= resultlength <= 1000000");
     }
     sequenceLength = args_info.sequencelength_arg;
     if(sequenceLength < 1 || sequenceLength > 1000) {
--- a/audioDB.h	Mon May 19 16:25:12 2008 +0000
+++ b/audioDB.h	Mon Jun 09 19:20:39 2008 +0000
@@ -68,7 +68,7 @@
 #define O2_HEADERSIZE (sizeof(dbTableHeaderT))
 #define O2_MEANNUMVECTORS (1000U)
 #define O2_MAXDIM (1000U)
-#define O2_MAXNN (10000U)
+#define O2_MAXNN (1000000U)
 
 // Flags
 #define O2_FLAG_L2NORM (0x1U)
@@ -81,6 +81,7 @@
 #define O2_SEQUENCE_QUERY (0x8U)
 #define O2_TRACK_QUERY (0x10U)
 #define O2_N_SEQUENCE_QUERY (0x20U)
+#define O2_ONE_TO_ONE_N_SEQUENCE_QUERY (0x40U)
 
 
 // Error Codes
--- a/gengetopt.in	Mon May 19 16:25:12 2008 +0000
+++ b/gengetopt.in	Mon Jun 09 19:20:39 2008 +0000
@@ -42,7 +42,7 @@
 
 section "Database Search" sectiondesc="Thse commands control the retrieval behaviour.\n"
 
-option "QUERY" Q "content-based search on --database using --features as a query. Optionally restrict the search to those tracks identified in a --keyList." values="point","track","sequence", "nsequence" typestr="searchtype" dependon="database" dependon="features" optional
+option "QUERY" Q "content-based search on --database using --features as a query. Optionally restrict the search to those tracks identified in a --keyList." values="point","track","sequence","nsequence","onetoonensequence" typestr="searchtype" dependon="database" dependon="features" optional
 option "qpoint" p "ordinal position of query start point in --features file." int typestr="position" default="0" optional
 option "exhaustive" e "exhaustive search: iterate through all query vectors in search. Overrides --qpoint." flag off optional hidden
 option "pointnn" n "number of point nearest neighbours to use in retrieval." int typestr="numpoints" default="10" optional
--- a/query.cpp	Mon May 19 16:25:12 2008 +0000
+++ b/query.cpp	Mon Jun 09 19:20:39 2008 +0000
@@ -44,6 +44,13 @@
       r = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles);
     }
     break;
+  case O2_ONE_TO_ONE_N_SEQUENCE_QUERY :
+    if(radius == 0) {
+      error("query-type not yet supported");
+    } else {
+      r = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles);
+    }
+    break;
   default:
     error("unrecognized queryType in query()");
   }  
--- a/reporter.h	Mon May 19 16:25:12 2008 +0000
+++ b/reporter.h	Mon Jun 09 19:20:39 2008 +0000
@@ -3,8 +3,6 @@
 #include <set>
 #include <functional>
 
-#define MIN_ARG(a,b) a<b?a:b
-
 typedef struct nnresult {
   unsigned int trackID;
   double dist;
@@ -482,3 +480,97 @@
     // FIXME
   }
 }
+
+
+
+
+
+/****************** EXPERIMENTAL REPORTERS ***************/
+
+
+
+
+
+
+// track Sequence Query Radius NN Reporter
+// retrieve tracks ordered by query-point matches (one per track per query point)
+//
+// as well as sorted n-NN points per retrieved track
+class trackSequenceQueryRadNNReporterOneToOne : public Reporter { 
+public:
+  trackSequenceQueryRadNNReporterOneToOne(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles);
+  ~trackSequenceQueryRadNNReporterOneToOne();
+  void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist);
+  void report(char *fileTable, adb__queryResponse *adbQueryResponse);
+ protected:
+  unsigned int pointNN;
+  unsigned int trackNN;
+  unsigned int numFiles;
+  std::set< NNresult > *set;
+  std::vector< NNresult> *point_queue;
+  unsigned int *count;
+
+};
+
+trackSequenceQueryRadNNReporterOneToOne::trackSequenceQueryRadNNReporterOneToOne(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles):
+pointNN(pointNN), trackNN(trackNN), numFiles(numFiles) {
+  // Where to count Radius track matches (one-to-one)
+  set = new std::set< NNresult >; 
+  // Where to insert individual point matches (one-to-many)
+  point_queue = new std::vector< NNresult >;
+  
+  count = new unsigned int[numFiles];
+  for (unsigned i = 0; i < numFiles; i++) {
+    count[i] = 0;
+  }
+}
+
+trackSequenceQueryRadNNReporterOneToOne::~trackSequenceQueryRadNNReporterOneToOne() {
+  delete set;
+  delete [] count;
+}
+
+void trackSequenceQueryRadNNReporterOneToOne::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) {
+  std::set< NNresult >::iterator it;
+  NNresult r;
+  r.qpos = qpos;
+  r.trackID = trackID;
+
+  // Track insertion count <trackID,qpos> pairs
+  it = set->find(r);
+  if ( it == set->end() ) {
+    set->insert(r);
+    count[trackID]++;
+  }
+
+  // Point insertion
+  // Keep the <qpos> result with the smallest <dist> value (greedy local one-to-one algorithm)
+  r.spos = spos;
+  r.dist = dist;
+
+  if(point_queue->size() < r.qpos + 1){
+    point_queue->resize( r.qpos + 1 );
+    (*point_queue)[r.qpos].dist = 1e6;
+  }
+
+  if (r.dist < (*point_queue)[r.qpos].dist)
+    (*point_queue)[r.qpos] = r;
+
+}
+
+void trackSequenceQueryRadNNReporterOneToOne::report(char *fileTable, adb__queryResponse *adbQueryResponse) {
+  if(adbQueryResponse==0) {
+    std::vector< NNresult >::iterator vit;
+    NNresult rk;
+    for( vit = point_queue->begin() ; vit < point_queue->end() ; vit++ ){
+      rk = *vit;
+      std::cout << rk.dist << " " 
+		<< rk.qpos << " " 
+		<< rk.spos << " " 
+		<< fileTable + rk.trackID*O2_FILETABLE_ENTRY_SIZE 
+		<< std::endl;
+      }
+  } else {
+    // FIXME
+  }
+}