# HG changeset patch # User mas01cr # Date 1197316903 0 # Node ID c76cdcf096fd70fc2c3b8df0c2f7f082a4e1e010 # Parent a5d2fa43accfb6fab5a2b2190d227b1304c7b714 Proof of Reporter concept: restore pointQuery and trackPointQuery Only now it's "obvious" how they are the same and how they differ from the basic loop: * normalizedDistance is the only new feature added; * all the rest is in reporter stuff... * ... except that we have to turn on L2Norm, even if we don't use it. Alter the tests to cope with this fact, l2norming the point- and track-search databases. diff -r a5d2fa43accf -r c76cdcf096fd audioDB.h --- a/audioDB.h Mon Dec 10 18:25:55 2007 +0000 +++ b/audioDB.h Mon Dec 10 20:01:43 2007 +0000 @@ -167,6 +167,7 @@ unsigned trackNN; // how many track NNs ? unsigned sequenceLength; unsigned sequenceHop; + bool normalizedDistance; unsigned queryPoint; unsigned usingQueryPoint; unsigned usingTimes; @@ -276,6 +277,7 @@ trackNN(O2_DEFAULT_TRACKNN), \ sequenceLength(16), \ sequenceHop(1), \ + normalizedDistance(true), \ queryPoint(0), \ usingQueryPoint(0), \ usingTimes(0), \ diff -r a5d2fa43accf -r c76cdcf096fd query.cpp --- a/query.cpp Mon Dec 10 18:25:55 2007 +0000 +++ b/query.cpp Mon Dec 10 20:01:43 2007 +0000 @@ -27,6 +27,10 @@ return a.dist < b.dist; } +bool operator> (const NNresult &a, const NNresult &b) { + return a.dist > b.dist; +} + bool operator<= (const NNresult &a, const NNresult &b) { return a.dist <= b.dist; } @@ -39,9 +43,190 @@ public: virtual ~Reporter() {}; virtual void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) = 0; + // FIXME: this interface is a bit wacky: a relic of previous, more + // confused times. Really it might make sense to have separate + // reporter classes for WS and for stdout, rather than passing this + // adbQueryResponse thing everywhere; the fileTable argument is + // there solely for convertion trackIDs into names. -- CSR, + // 2007-12-10. virtual void report(char *fileTable, adb__queryResponse *adbQueryResponse) = 0; }; +class pointQueryReporter : public Reporter { +public: + pointQueryReporter(unsigned int pointNN); + ~pointQueryReporter(); + void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); + void report(char *fileTable, adb__queryResponse *adbQueryResponse); +private: + unsigned int pointNN; + std::priority_queue< NNresult, std::vector< NNresult >, std::greater< NNresult > > *queue; +}; + +pointQueryReporter::pointQueryReporter(unsigned int pointNN) + : pointNN(pointNN) { + queue = new std::priority_queue< NNresult, std::vector< NNresult >, std::greater< NNresult > >; +} + +pointQueryReporter::~pointQueryReporter() { + delete queue; +} + +void pointQueryReporter::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) { + NNresult r; + r.trackID = trackID; + r.qpos = qpos; + r.spos = spos; + r.dist = dist; + queue->push(r); + if(queue->size() > pointNN) { + queue->pop(); + } +} + +void pointQueryReporter::report(char *fileTable, adb__queryResponse *adbQueryResponse) { + NNresult r; + std::vector v; + unsigned int size = queue->size(); + for(unsigned int k = 0; k < size; k++) { + r = queue->top(); + v.push_back(r); + queue->pop(); + } + std::vector::reverse_iterator rit; + + if(adbQueryResponse==0) { + for(rit = v.rbegin(); rit < v.rend(); rit++) { + r = *rit; + std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " "; + std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl; + } + } else { + adbQueryResponse->result.__sizeRlist=size; + adbQueryResponse->result.__sizeDist=size; + adbQueryResponse->result.__sizeQpos=size; + adbQueryResponse->result.__sizeSpos=size; + adbQueryResponse->result.Rlist= new char*[size]; + adbQueryResponse->result.Dist = new double[size]; + adbQueryResponse->result.Qpos = new unsigned int[size]; + adbQueryResponse->result.Spos = new unsigned int[size]; + unsigned int k = 0; + for(rit = v.rbegin(); rit < v.rend(); rit++, k++) { + r = *rit; + adbQueryResponse->result.Rlist[k] = new char[O2_MAXFILESTR]; + adbQueryResponse->result.Dist[k] = r.dist; + adbQueryResponse->result.Qpos[k] = r.qpos; + adbQueryResponse->result.Spos[k] = r.spos; + snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE); + } + } +} + +// FIXME: this trackPointQueryReporter is actually an almost +// word-for-word copy of trackSequenceQueryNNReporter, below: the only +// difference is that all priority queues use std::greater, +// rather than the implicit default std::less. There's +// probably some clever C++ way of expressing that; find out and +// delete one copy of the code. +class trackPointQueryReporter : public Reporter { +public: + trackPointQueryReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles); + ~trackPointQueryReporter(); + void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); + void report(char *fileTable, adb__queryResponse *adbQueryResponse); +private: + unsigned int pointNN; + unsigned int trackNN; + unsigned int numFiles; + std::priority_queue< NNresult, std::vector< NNresult>, std::greater< NNresult > > *queues; +}; + +trackPointQueryReporter::trackPointQueryReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles) + : pointNN(pointNN), trackNN(trackNN), numFiles(numFiles) { + queues = new std::priority_queue< NNresult, std::vector< NNresult>, std::greater< NNresult > >[numFiles]; +} + +trackPointQueryReporter::~trackPointQueryReporter() { + delete [] queues; +} + +void trackPointQueryReporter::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) { + NNresult r; + r.trackID = trackID; + r.qpos = qpos; + r.spos = spos; + r.dist = dist; + queues[trackID].push(r); + if(queues[trackID].size() > pointNN) { + queues[trackID].pop(); + } +} + +void trackPointQueryReporter::report(char *fileTable, adb__queryResponse *adbQueryResponse) { + std::priority_queue < NNresult, std::vector< NNresult>, std::greater< NNresult > > result; + for (int i = numFiles-1; i >= 0; i--) { + unsigned int size = queues[i].size(); + if (size > 0) { + NNresult r; + double dist = 0; + NNresult oldr = queues[i].top(); + for (unsigned int j = 0; j < size; j++) { + r = queues[i].top(); + dist += r.dist; + queues[i].pop(); + if (r.dist == oldr.dist) { + r.qpos = oldr.qpos; + r.spos = oldr.spos; + } else { + oldr = r; + } + } + dist /= size; + r.dist = dist; // trackID, qpos and spos are magically right already. + result.push(r); + if (result.size() > trackNN) { + result.pop(); + } + } + } + + NNresult r; + std::vector v; + unsigned int size = result.size(); + for(unsigned int k = 0; k < size; k++) { + r = result.top(); + v.push_back(r); + result.pop(); + } + std::vector::reverse_iterator rit; + + if(adbQueryResponse==0) { + for(rit = v.rbegin(); rit < v.rend(); rit++) { + r = *rit; + std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " "; + std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl; + } + } else { + adbQueryResponse->result.__sizeRlist=size; + adbQueryResponse->result.__sizeDist=size; + adbQueryResponse->result.__sizeQpos=size; + adbQueryResponse->result.__sizeSpos=size; + adbQueryResponse->result.Rlist= new char*[size]; + adbQueryResponse->result.Dist = new double[size]; + adbQueryResponse->result.Qpos = new unsigned int[size]; + adbQueryResponse->result.Spos = new unsigned int[size]; + unsigned int k = 0; + for(rit = v.rbegin(); rit < v.rend(); rit++, k++) { + r = *rit; + adbQueryResponse->result.Rlist[k] = new char[O2_MAXFILESTR]; + adbQueryResponse->result.Dist[k] = r.dist; + adbQueryResponse->result.Qpos[k] = r.qpos; + adbQueryResponse->result.Spos[k] = r.spos; + snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE); + } + } +} + class trackSequenceQueryNNReporter : public Reporter { public: trackSequenceQueryNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles); @@ -229,22 +414,33 @@ void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { initTables(dbName, inFile); - + Reporter *r = 0; switch (queryType) { + case O2_POINT_QUERY: + sequenceLength = 1; + normalizedDistance = false; + r = new pointQueryReporter(pointNN); + trackSequenceQueryNN(dbName, inFile, r); + break; + case O2_TRACK_QUERY: + sequenceLength = 1; + normalizedDistance = false; + r = new trackPointQueryReporter(pointNN, trackNN, dbH->numFiles); + trackSequenceQueryNN(dbName, inFile, r); + break; case O2_SEQUENCE_QUERY: - Reporter *r; if(radius == 0) { r = new trackSequenceQueryNNReporter(pointNN, trackNN, dbH->numFiles); } else { r = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles); } trackSequenceQueryNN(dbName, inFile, r); - r->report(fileTable, adbQueryResponse); - delete r; break; default: error("unrecognized queryType in query()"); } + r->report(fileTable, adbQueryResponse); + delete r; } // return ordinal position of key in keyTable @@ -563,9 +759,9 @@ off_t *trackOffsetTable = new off_t[dbH->numFiles]; unsigned cumTrack=0; off_t trackIndexOffset; - for(k=0; knumFiles;k++){ - trackOffsetTable[k]=cumTrack; - cumTrack+=trackTable[k]*dbH->dim; + for(k = 0; k < dbH->numFiles; k++){ + trackOffsetTable[k] = cumTrack; + cumTrack += trackTable[k] * dbH->dim; } char nextKey[MAXSTR]; @@ -610,9 +806,14 @@ } // Search for minimum distance by shingles (concatenated vectors) - for(j=0;j<=numVectors-wL;j+=HOP_SIZE) { - for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ - double thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; + for(j = 0; j <= numVectors - wL; j += HOP_SIZE) { + for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) { + double thisDist; + if(normalizedDistance) { + thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; + } else { + thisDist = DD[j][k]; + } // Power test if ((!usingPower) || powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k])) { // radius test diff -r a5d2fa43accf -r c76cdcf096fd tests/0003/run-test.sh --- a/tests/0003/run-test.sh Mon Dec 10 18:25:55 2007 +0000 +++ b/tests/0003/run-test.sh Mon Dec 10 20:01:43 2007 +0000 @@ -1,13 +1,14 @@ #! /bin/sh -exit 14 - . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi ${AUDIODB} -d testdb -N +# point query now implemented as sequence search +${AUDIODB} -d testdb -L + # We could contemplate putting the test feature (and the expected # query output) under svn control if we trust its binary file # handling. diff -r a5d2fa43accf -r c76cdcf096fd tests/0004/run-test.sh --- a/tests/0004/run-test.sh Mon Dec 10 18:25:55 2007 +0000 +++ b/tests/0004/run-test.sh Mon Dec 10 20:01:43 2007 +0000 @@ -1,13 +1,13 @@ #! /bin/sh -exit 14 - . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi ${AUDIODB} -d testdb -N +${AUDIODB} -d testdb -L + intstring 2 > testfeature floatstring 0 1 >> testfeature floatstring 1 0 >> testfeature diff -r a5d2fa43accf -r c76cdcf096fd tests/0009/run-test.sh --- a/tests/0009/run-test.sh Mon Dec 10 18:25:55 2007 +0000 +++ b/tests/0009/run-test.sh Mon Dec 10 20:01:43 2007 +0000 @@ -1,13 +1,13 @@ #! /bin/sh -exit 14 - . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi ${AUDIODB} -d testdb -N +${AUDIODB} -d testdb -L + intstring 2 > testfeature01 floatstring 0 1 >> testfeature01 intstring 2 > testfeature10 diff -r a5d2fa43accf -r c76cdcf096fd tests/0014/run-test.sh --- a/tests/0014/run-test.sh Mon Dec 10 18:25:55 2007 +0000 +++ b/tests/0014/run-test.sh Mon Dec 10 20:01:43 2007 +0000 @@ -1,13 +1,13 @@ #! /bin/sh -exit 14 - . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi ${AUDIODB} -d testdb -N +${AUDIODB} -d testdb -L + intstring 2 > testfeature floatstring 0 1 >> testfeature floatstring 1 0 >> testfeature diff -r a5d2fa43accf -r c76cdcf096fd tests/0019/run-test.sh --- a/tests/0019/run-test.sh Mon Dec 10 18:25:55 2007 +0000 +++ b/tests/0019/run-test.sh Mon Dec 10 20:01:43 2007 +0000 @@ -1,13 +1,13 @@ #! /bin/sh -exit 14 - . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi ${AUDIODB} -d testdb -N +${AUDIODB} -d testdb -L + intstring 2 > testfeature01 floatstring 0 1 >> testfeature01 intstring 2 > testfeature10 diff -r a5d2fa43accf -r c76cdcf096fd tests/0032/run-test.sh --- a/tests/0032/run-test.sh Mon Dec 10 18:25:55 2007 +0000 +++ b/tests/0032/run-test.sh Mon Dec 10 20:01:43 2007 +0000 @@ -1,13 +1,13 @@ #! /bin/sh -exit 14 - . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi ${AUDIODB} -d testdb -N +${AUDIODB} -d testdb -L + intstring 2 > testfeature01 floatstring 0 1 >> testfeature01 intstring 2 > testfeature10