Mercurial > hg > audiodb
changeset 277:abfb26e08d9c audiodb-debian
Merge trunk changes -r326:386 into audiodb-debian branch.
Plus new debian/changelog version. (Should have used an epoch really,
but couldn't be bothered; TODO: work out a sane version numbering
policy).
line wrap: on
line diff
--- a/audioDB.cpp Mon Dec 17 16:44:37 2007 +0000 +++ b/audioDB.cpp Tue Jul 01 09:12:40 2008 +0000 @@ -136,10 +136,32 @@ } if(args_info.size_given) { + if(args_info.datasize_given) { + error("both --size and --datasize given", ""); + } + if(args_info.ntracks_given) { + error("both --size and --ntracks given", ""); + } + if(args_info.datadim_given) { + error("both --size and --datadim given", ""); + } if (args_info.size_arg < 50 || args_info.size_arg > 32000) { error("Size out of range", ""); } - size = (off_t) args_info.size_arg * 1000000; + double ratio = (double) args_info.size_arg * 1000000 / ((double) O2_DEFAULTDBSIZE); + /* FIXME: what's the safe way of doing this? */ + datasize = (unsigned int) ceil(datasize * ratio); + ntracks = (unsigned int) ceil(ntracks * ratio); + } else { + if(args_info.datasize_given) { + datasize = args_info.datasize_arg; + } + if(args_info.ntracks_given) { + ntracks = args_info.ntracks_arg; + } + if(args_info.datadim_given) { + datadim = args_info.datadim_arg; + } } if(args_info.radius_given) { @@ -306,6 +328,10 @@ queryType=O2_POINT_QUERY; else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) queryType=O2_SEQUENCE_QUERY; + else if(strncmp(args_info.QUERY_arg, "nsequence", MAXSTR)==0) + queryType=O2_N_SEQUENCE_QUERY; + else if(strncmp(args_info.QUERY_arg, "onetoonensequence", MAXSTR)==0) + queryType=O2_ONE_TO_ONE_N_SEQUENCE_QUERY; else error("unsupported query type",args_info.QUERY_arg); @@ -317,12 +343,12 @@ } pointNN = args_info.pointnn_arg; - if(pointNN < 1 || pointNN > 1000) { - error("pointNN out of range: 1 <= pointNN <= 1000"); + if(pointNN < 1 || pointNN > O2_MAXNN) { + error("pointNN out of range: 1 <= pointNN <= 1000000"); } trackNN = args_info.resultlength_arg; - if(trackNN < 1 || trackNN > 1000) { - error("resultlength out of range: 1 <= resultlength <= 1000"); + if(trackNN < 1 || trackNN > O2_MAXNN) { + error("resultlength out of range: 1 <= resultlength <= 1000000"); } sequenceLength = args_info.sequencelength_arg; if(sequenceLength < 1 || sequenceLength > 1000) {
--- a/audioDB.h Mon Dec 17 16:44:37 2007 +0000 +++ b/audioDB.h Tue Jul 01 09:12:40 2008 +0000 @@ -54,16 +54,21 @@ #define O2_DEFAULT_POINTNN (10U) #define O2_DEFAULT_TRACKNN (10U) +//#define O2_DEFAULTDBSIZE (4000000000) // 4GB table size #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size +#define O2_DEFAULT_DATASIZE (1355U) // in MB +#define O2_DEFAULT_NTRACKS (20000U) +#define O2_DEFAULT_DATADIM (9U) + #define O2_MAXFILES (20000U) #define O2_MAXFILESTR (256U) -#define O2_FILETABLESIZE (O2_MAXFILESTR) -#define O2_TRACKTABLESIZE (sizeof(unsigned)) +#define O2_FILETABLE_ENTRY_SIZE (O2_MAXFILESTR) +#define O2_TRACKTABLE_ENTRY_SIZE (sizeof(unsigned)) #define O2_HEADERSIZE (sizeof(dbTableHeaderT)) #define O2_MEANNUMVECTORS (1000U) #define O2_MAXDIM (1000U) -#define O2_MAXNN (10000U) +#define O2_MAXNN (1000000U) // Flags #define O2_FLAG_L2NORM (0x1U) @@ -75,6 +80,9 @@ #define O2_POINT_QUERY (0x4U) #define O2_SEQUENCE_QUERY (0x8U) #define O2_TRACK_QUERY (0x10U) +#define O2_N_SEQUENCE_QUERY (0x20U) +#define O2_ONE_TO_ONE_N_SEQUENCE_QUERY (0x40U) + // Error Codes #define O2_ERR_KEYNOTFOUND (0xFFFFFF00) @@ -167,7 +175,12 @@ // Flags and parameters unsigned verbosity; // how much do we want to know? - off_t size; // given size (for creation) + + //off_t size; // given size (for creation) + unsigned datasize; // size in MB + unsigned ntracks; + unsigned datadim; + unsigned queryType; // point queries default unsigned pointNN; // how many point NNs ? unsigned trackNN; // how many track NNs ? @@ -227,6 +240,7 @@ void release_lock(int fd); void create(const char* dbName); void drop(); + bool enough_per_file_space_free(); bool enough_data_space_free(off_t size); void insert_data_vectors(off_t offset, void *buffer, size_t size); void insert(const char* dbName, const char* inFile); @@ -277,7 +291,9 @@ powerTableLength(0), \ l2normTableLength(0), \ verbosity(1), \ - size(O2_DEFAULTDBSIZE), \ + datasize(O2_DEFAULT_DATASIZE), \ + ntracks(O2_DEFAULT_NTRACKS), \ + datadim(O2_DEFAULT_DATADIM), \ queryType(O2_POINT_QUERY), \ pointNN(O2_DEFAULT_POINTNN), \ trackNN(O2_DEFAULT_TRACKNN), \
--- a/common.cpp Mon Dec 17 16:44:37 2007 +0000 +++ b/common.cpp Tue Jul 01 09:12:40 2008 +0000 @@ -115,8 +115,8 @@ powerTableLength = dbH->l2normTableOffset - dbH->powerTableOffset; l2normTableLength = dbH->dbSize - dbH->l2normTableOffset; } else { - fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE); - trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE); + fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLE_ENTRY_SIZE); + trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLE_ENTRY_SIZE); dataBufLength = ALIGN_PAGE_UP(dbH->length); timesTableLength = ALIGN_PAGE_UP(2*(dbH->length / dbH->dim)); powerTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim);
--- a/create.cpp Mon Dec 17 16:44:37 2007 +0000 +++ b/create.cpp Tue Jul 01 09:12:40 2008 +0000 @@ -24,7 +24,7 @@ dbH = new dbTableHeaderT(); assert(dbH); - unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE); + //unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE); // Initialize header dbH->magic = O2_MAGIC; @@ -35,17 +35,21 @@ dbH->headerSize = O2_HEADERSIZE; dbH->length = 0; dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE); - dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles); - dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles); - dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); - dbH->powerTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); - dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->powerTableOffset - 2*maxfiles*O2_MEANNUMVECTORS*sizeof(double)); - dbH->dbSize = size; + dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks); + dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLE_ENTRY_SIZE*ntracks); + + off_t databytes = ((off_t) datasize) * 1024 * 1024; + off_t auxbytes = databytes / datadim; + + dbH->timesTableOffset = ALIGN_PAGE_UP(dbH->dataOffset + databytes); + dbH->powerTableOffset = ALIGN_PAGE_UP(dbH->timesTableOffset + 2*auxbytes); + dbH->l2normTableOffset = ALIGN_PAGE_UP(dbH->powerTableOffset + auxbytes); + dbH->dbSize = ALIGN_PAGE_UP(dbH->l2normTableOffset + auxbytes); write(dbfid, dbH, O2_HEADERSIZE); // go to the location corresponding to the last byte - if (lseek (dbfid, size - 1, SEEK_SET) == -1) + if (lseek (dbfid, dbH->dbSize - 1, SEEK_SET) == -1) error("lseek error in db file", "", "lseek"); // write a dummy byte at the last location
--- a/debian/changelog Mon Dec 17 16:44:37 2007 +0000 +++ b/debian/changelog Tue Jul 01 09:12:40 2008 +0000 @@ -1,3 +1,10 @@ +audiodb (1:0.8.9.anteindex-1) unstable; urgency=low + + * Updated to svn version #386 + * version 0.8.9.anteindex (hopefully /just/ before index) + + -- Christophe Rhodes <crhodes@gold.ac.uk> Tue, 24 Jun 2008 14:41:03 +0100 + audiodb (1:0.8.preview-1) unstable; urgency=low * Updated to svn version #325
--- a/dump.cpp Mon Dec 17 16:44:37 2007 +0000 +++ b/dump.cpp Tue Jul 01 09:12:40 2008 +0000 @@ -62,7 +62,7 @@ double *data_buffer; size_t data_buffer_size; for(unsigned k = 0; k < dbH->numFiles; k++) { - fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE); + fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLE_ENTRY_SIZE); snprintf(fName, 256, "%05d.features", k); if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { error("error creating feature file", fName, "open"); @@ -130,7 +130,7 @@ } pos += trackTable[k]; - std::cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << std::endl; + std::cout << fileTable+k*O2_FILETABLE_ENTRY_SIZE << " " << trackTable[k] << std::endl; } FILE *scriptFile; @@ -142,7 +142,12 @@ \n\ if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\ if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\ -\"${AUDIODB}\" -d \"$1\" -N --size=%d\n", (int) (dbH->dbSize / 1000000)); +\"${AUDIODB}\" -d \"$1\" -N --datasize=%d --ntracks=%d --datadim=%d\n", + (int) ((dbH->timesTableOffset - dbH->dataOffset) / (1024*1024)), + // fileTable entries (char[256]) are bigger than trackTable + // (int), so the granularity of page aligning is finer. + (int) ((dbH->trackTableOffset - dbH->fileTableOffset) / O2_FILETABLE_ENTRY_SIZE), + (int) ceil(((double) (dbH->timesTableOffset - dbH->dataOffset)) / ((double) (dbH->dbSize - dbH->l2normTableOffset)))); if(dbH->flags & O2_FLAG_L2NORM) { fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n"); }
--- a/gengetopt.in Mon Dec 17 16:44:37 2007 +0000 +++ b/gengetopt.in Tue Jul 01 09:12:40 2008 +0000 @@ -5,11 +5,20 @@ option "verbosity" v "level of detail of operational information." int typestr="detail" default="1" optional text "\nDatabase commands are UPPER CASE. Command options are lower case.\n" text "" -section "Database Setup" sectiondesc="All database operations require a database argument." +section "Database Operations" sectiondesc="All database operations require a database argument." option "database" d "database file required by Database commands." string typestr="filename" optional + +section "Database Creation" sectiondesc="Creating a new database file." + option "NEW" N "make a new (initially empty) database." dependon="database" optional -option "size" - "size of database file (in MB)" int dependon="NEW" default="2000" optional +option "size" - "size of database file (in MB)" int dependon="NEW" optional hidden +option "datasize" - "size of data table requested (in MB)" int dependon="NEW" default="1355" optional +option "ntracks" - "capacity of database for tracks" int dependon="NEW" default="20000" optional +option "datadim" - "dimensionality of stored data" int dependon="NEW" default="9" optional + +section "Database Maintenance" sectiondesc="Querying, tweaking and dumping databases." + option "STATUS" S "output database information to stdout." dependon="database" optional option "DUMP" D "output all entries: index key size." dependon="database" optional option "output" - "output directory" string dependon="DUMP" default="audioDB.dump" optional @@ -33,7 +42,7 @@ section "Database Search" sectiondesc="Thse commands control the retrieval behaviour.\n" -option "QUERY" Q "content-based search on --database using --features as a query. Optionally restrict the search to those tracks identified in a --keyList." values="point","track","sequence" typestr="searchtype" dependon="database" dependon="features" optional +option "QUERY" Q "content-based search on --database using --features as a query. Optionally restrict the search to those tracks identified in a --keyList." values="point","track","sequence","nsequence","onetoonensequence" typestr="searchtype" dependon="database" dependon="features" optional option "qpoint" p "ordinal position of query start point in --features file." int typestr="position" default="0" optional option "exhaustive" e "exhaustive search: iterate through all query vectors in search. Overrides --qpoint." flag off optional hidden option "pointnn" n "number of point nearest neighbours to use in retrieval." int typestr="numpoints" default="10" optional
--- a/insert.cpp Mon Dec 17 16:44:37 2007 +0000 +++ b/insert.cpp Tue Jul 01 09:12:40 2008 +0000 @@ -1,5 +1,15 @@ #include "audioDB.h" +bool audioDB::enough_per_file_space_free() { + unsigned int fmaxfiles, tmaxfiles; + unsigned int maxfiles; + + fmaxfiles = fileTableLength / O2_FILETABLE_ENTRY_SIZE; + tmaxfiles = trackTableLength / O2_TRACKTABLE_ENTRY_SIZE; + maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles; + return(dbH->numFiles < maxfiles); +} + bool audioDB::enough_data_space_free(off_t size) { return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size); } @@ -19,6 +29,10 @@ if(!usingPower && (dbH->flags & O2_FLAG_POWER)) error("Must use power with power-enabled database", dbName); + if(!enough_per_file_space_free()) { + error("Insert failed: no more room for metadata", inFile); + } + if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { error("Insert failed: no more room in database", inFile); } @@ -28,7 +42,7 @@ // Linear scan of filenames check for pre-existing feature unsigned alreadyInserted=0; for(unsigned k=0; k<dbH->numFiles; k++) - if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){ + if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)+1)==0){ alreadyInserted=1; break; } @@ -50,7 +64,7 @@ return; } - strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); + strncpy(fileTable + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, key, strlen(key)); off_t insertoffset = dbH->length;// Store current state @@ -189,26 +203,36 @@ error("Must use power with power-enabled database", dbName); unsigned totalVectors=0; - char *thisKey = new char[MAXSTR]; char *thisFile = new char[MAXSTR]; + char *thisKey = 0; + if (key && (key != inFile)) { + thisKey = new char[MAXSTR]; + } char *thisTimesFileName = new char[MAXSTR]; char *thisPowerFileName = new char[MAXSTR]; do{ filesIn->getline(thisFile,MAXSTR); - if(key && key!=inFile) + if(key && key!=inFile) { keysIn->getline(thisKey,MAXSTR); - else + } else { thisKey = thisFile; - if(usingTimes) - timesFile->getline(thisTimesFileName,MAXSTR); - if(usingPower) + } + if(usingTimes) { + timesFile->getline(thisTimesFileName,MAXSTR); + } + if(usingPower) { powerFile->getline(thisPowerFileName, MAXSTR); + } - if(filesIn->eof()) + if(filesIn->eof()) { break; + } + initInputFile(thisFile); - initInputFile(thisFile); + if(!enough_per_file_space_free()) { + error("batchinsert failed: no more room for metadata", thisFile); + } if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { error("batchinsert failed: no more room in database", thisFile); @@ -218,7 +242,7 @@ unsigned alreadyInserted=0; for(unsigned k=0; k<dbH->numFiles; k++) - if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){ + if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, thisKey, strlen(thisKey)+1)==0){ alreadyInserted=1; break; } @@ -267,7 +291,7 @@ close(thispowerfd); } } - strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); + strncpy(fileTable + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, thisKey, strlen(thisKey)); off_t insertoffset = dbH->length;// Store current state @@ -298,7 +322,17 @@ } while(!filesIn->eof()); VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double))); + + delete [] thisPowerFileName; + if(key && (key != inFile)) { + delete [] thisKey; + } + delete [] thisFile; + delete [] thisTimesFileName; + delete filesIn; + delete keysIn; + // Report status status(dbName); }
--- a/query.cpp Mon Dec 17 16:44:37 2007 +0000 +++ b/query.cpp Tue Jul 01 09:12:40 2008 +0000 @@ -37,6 +37,20 @@ r = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles); } break; + case O2_N_SEQUENCE_QUERY : + if(radius == 0) { + r = new trackSequenceQueryNNReporter<std::less < NNresult > >(pointNN, trackNN, dbH->numFiles); + } else { + r = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles); + } + break; + case O2_ONE_TO_ONE_N_SEQUENCE_QUERY : + if(radius == 0) { + error("query-type not yet supported"); + } else { + r = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles); + } + break; default: error("unrecognized queryType in query()"); } @@ -48,7 +62,7 @@ // return ordinal position of key in keyTable unsigned audioDB::getKeyPos(char* key){ for(unsigned k=0; k<dbH->numFiles; k++) - if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) + if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key))==0) return k; error("Key not found",key); return O2_ERR_KEYNOTFOUND;
--- a/reporter.h Mon Dec 17 16:44:37 2007 +0000 +++ b/reporter.h Tue Jul 01 09:12:40 2008 +0000 @@ -1,5 +1,6 @@ #include <utility> #include <queue> +#include <deque> #include <set> #include <functional> @@ -88,7 +89,7 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " "; + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " "; std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl; } } else { @@ -107,7 +108,7 @@ adbQueryResponse->result.Dist[k] = r.dist; adbQueryResponse->result.Qpos[k] = r.qpos; adbQueryResponse->result.Spos[k] = r.spos; - snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE); + snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); } } } @@ -118,7 +119,7 @@ ~trackAveragingReporter(); void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); void report(char *fileTable, adb__queryResponse *adbQueryResponse); - private: + protected: unsigned int pointNN; unsigned int trackNN; unsigned int numFiles; @@ -189,7 +190,7 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " "; + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " "; std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl; } } else { @@ -208,18 +209,20 @@ adbQueryResponse->result.Dist[k] = r.dist; adbQueryResponse->result.Qpos[k] = r.qpos; adbQueryResponse->result.Spos[k] = r.spos; - snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLESIZE); + snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); } } } +// track Sequence Query Radius Reporter +// only return tracks and retrieved point counts class trackSequenceQueryRadReporter : public Reporter { public: trackSequenceQueryRadReporter(unsigned int trackNN, unsigned int numFiles); ~trackSequenceQueryRadReporter(); void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); void report(char *fileTable, adb__queryResponse *adbQueryResponse); -private: + protected: unsigned int trackNN; unsigned int numFiles; std::set<std::pair<unsigned int, unsigned int> > *set; @@ -242,11 +245,11 @@ void trackSequenceQueryRadReporter::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) { std::set<std::pair<unsigned int, unsigned int> >::iterator it; - std::pair<unsigned int, unsigned int> pair = std::make_pair(trackID, qpos); + std::pair<unsigned int, unsigned int> pair = std::make_pair(trackID, qpos); // only count this once it = set->find(pair); if (it == set->end()) { set->insert(pair); - count[trackID]++; + count[trackID]++; // only count if <tackID,qpos> pair is unique } } @@ -279,9 +282,297 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLESIZE << " " << r.count << std::endl; + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.count << std::endl; } } else { // FIXME } } + +// Another type of trackAveragingReporter that reports all pointNN nearest neighbours +template <class T> class trackSequenceQueryNNReporter : public trackAveragingReporter<T> { + protected: + using trackAveragingReporter<T>::numFiles; + using trackAveragingReporter<T>::queues; + using trackAveragingReporter<T>::trackNN; + using trackAveragingReporter<T>::pointNN; + public: + trackSequenceQueryNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles); + void report(char *fileTable, adb__queryResponse *adbQueryResponse); +}; + +template <class T> trackSequenceQueryNNReporter<T>::trackSequenceQueryNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles) +:trackAveragingReporter<T>(pointNN, trackNN, numFiles){} + +template <class T> void trackSequenceQueryNNReporter<T>::report(char *fileTable, adb__queryResponse *adbQueryResponse) { + std::priority_queue < NNresult, std::vector< NNresult>, T> result; + std::priority_queue< NNresult, std::vector< NNresult>, std::less<NNresult> > *point_queues = new std::priority_queue< NNresult, std::vector< NNresult>, std::less<NNresult> >[numFiles]; + + for (int i = numFiles-1; i >= 0; i--) { + unsigned int size = queues[i].size(); + if (size > 0) { + NNresult r; + double dist = 0; + NNresult oldr = queues[i].top(); + for (unsigned int j = 0; j < size; j++) { + r = queues[i].top(); + dist += r.dist; + point_queues[i].push(r); + queues[i].pop(); + if (r.dist == oldr.dist) { + r.qpos = oldr.qpos; + r.spos = oldr.spos; + } else { + oldr = r; + } + } + dist /= size; + r.dist = dist; // trackID, qpos and spos are magically right already. + result.push(r); + if (result.size() > trackNN) { + result.pop(); + } + } + } + + NNresult r; + std::vector<NNresult> v; + unsigned int size = result.size(); + for(unsigned int k = 0; k < size; k++) { + r = result.top(); + v.push_back(r); + result.pop(); + } + std::vector<NNresult>::reverse_iterator rit; + std::priority_queue< NNresult, std::vector< NNresult>, std::greater<NNresult> > point_queue; + + if(adbQueryResponse==0) { + for(rit = v.rbegin(); rit < v.rend(); rit++) { + r = *rit; + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.dist << std::endl; + unsigned int qsize = point_queues[r.trackID].size(); + // Reverse the order of the points stored in point_queues + for(unsigned int k=0; k < qsize; k++){ + point_queue.push( point_queues[r.trackID].top() ); + point_queues[r.trackID].pop(); + } + + for(unsigned int k = 0; k < qsize; k++) { + NNresult rk = point_queue.top(); + std::cout << rk.dist << " " << rk.qpos << " " << rk.spos << std::endl; + point_queue.pop(); + } + } + } else { + adbQueryResponse->result.__sizeRlist=size; + adbQueryResponse->result.__sizeDist=size; + adbQueryResponse->result.__sizeQpos=size; + adbQueryResponse->result.__sizeSpos=size; + adbQueryResponse->result.Rlist= new char*[size]; + adbQueryResponse->result.Dist = new double[size]; + adbQueryResponse->result.Qpos = new unsigned int[size]; + adbQueryResponse->result.Spos = new unsigned int[size]; + unsigned int k = 0; + for(rit = v.rbegin(); rit < v.rend(); rit++, k++) { + r = *rit; + adbQueryResponse->result.Rlist[k] = new char[O2_MAXFILESTR]; + adbQueryResponse->result.Dist[k] = r.dist; + adbQueryResponse->result.Qpos[k] = r.qpos; + adbQueryResponse->result.Spos[k] = r.spos; + snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); + } + } + // clean up + delete[] point_queues; +} + + +// track Sequence Query Radius NN Reporter +// retrieve tracks ordered by query-point matches (one per track per query point) +// +// as well as sorted n-NN points per retrieved track +class trackSequenceQueryRadNNReporter : public Reporter { +public: + trackSequenceQueryRadNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles); + ~trackSequenceQueryRadNNReporter(); + void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); + void report(char *fileTable, adb__queryResponse *adbQueryResponse); + protected: + unsigned int pointNN; + unsigned int trackNN; + unsigned int numFiles; + std::set< NNresult > *set; + std::priority_queue< NNresult, std::vector< NNresult>, std::less<NNresult> > *point_queues; + unsigned int *count; +}; + +trackSequenceQueryRadNNReporter::trackSequenceQueryRadNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles): +pointNN(pointNN), trackNN(trackNN), numFiles(numFiles) { + // Where to count Radius track matches (one-to-one) + set = new std::set< NNresult >; + // Where to insert individual point matches (one-to-many) + point_queues = new std::priority_queue< NNresult, std::vector< NNresult>, std::less<NNresult> >[numFiles]; + + count = new unsigned int[numFiles]; + for (unsigned i = 0; i < numFiles; i++) { + count[i] = 0; + } +} + +trackSequenceQueryRadNNReporter::~trackSequenceQueryRadNNReporter() { + delete set; + delete [] count; +} + +void trackSequenceQueryRadNNReporter::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) { + std::set< NNresult >::iterator it; + NNresult r; + r.trackID = trackID; + r.qpos = qpos; + r.dist = dist; + r.spos = spos; + + // Record all matching points (within radius) + if (!isnan(dist)) { + point_queues[trackID].push(r); + if(point_queues[trackID].size() > pointNN) + point_queues[trackID].pop(); + } + + // Record counts of <trackID,qpos> pairs + it = set->find(r); + if (it == set->end()) { + set->insert(r); + count[trackID]++; + } +} + +void trackSequenceQueryRadNNReporter::report(char *fileTable, adb__queryResponse *adbQueryResponse) { + std::priority_queue < Radresult > result; + // KLUDGE: doing this backwards in an attempt to get the same + // tiebreak behaviour as before. + for (int i = numFiles-1; i >= 0; i--) { + Radresult r; + r.trackID = i; + r.count = count[i]; + if(r.count > 0) { + result.push(r); + if (result.size() > trackNN) { + result.pop(); + } + } + } + + Radresult r; + std::vector<Radresult> v; + unsigned int size = result.size(); + for(unsigned int k = 0; k < size; k++) { + r = result.top(); + v.push_back(r); + result.pop(); + } + + + // Traverse tracks in descending order of count cardinality + std::vector<Radresult>::reverse_iterator rit; + std::priority_queue< NNresult, std::vector< NNresult>, std::greater<NNresult> > point_queue; + + if(adbQueryResponse==0) { + for(rit = v.rbegin(); rit < v.rend(); rit++) { + r = *rit; + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.count << std::endl; + + // Reverse the order of the points stored in point_queues + unsigned int qsize=point_queues[r.trackID].size(); + for(unsigned int k=0; k < qsize; k++){ + point_queue.push(point_queues[r.trackID].top()); + point_queues[r.trackID].pop(); + } + + for(unsigned int k=0; k < qsize; k++){ + NNresult rk = point_queue.top(); + std::cout << rk.dist << " " << rk.qpos << " " << rk.spos << std::endl; + point_queue.pop(); + } + } + } else { + // FIXME + } + delete[] point_queues; +} + + +/********** ONE-TO-ONE REPORTERS *****************/ + +// track Sequence Query Radius NN Reporter One-to-One +// for each query point find the single best matching target point in all database +// report qpos, spos and trackID +class trackSequenceQueryRadNNReporterOneToOne : public Reporter { +public: + trackSequenceQueryRadNNReporterOneToOne(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles); + ~trackSequenceQueryRadNNReporterOneToOne(); + void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); + void report(char *fileTable, adb__queryResponse *adbQueryResponse); + protected: + unsigned int pointNN; + unsigned int trackNN; + unsigned int numFiles; + std::set< NNresult > *set; + std::vector< NNresult> *point_queue; + unsigned int *count; + +}; + +trackSequenceQueryRadNNReporterOneToOne::trackSequenceQueryRadNNReporterOneToOne(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles): +pointNN(pointNN), trackNN(trackNN), numFiles(numFiles) { + // Where to count Radius track matches (one-to-one) + set = new std::set< NNresult >; + // Where to insert individual point matches (one-to-many) + point_queue = new std::vector< NNresult >; + + count = new unsigned int[numFiles]; + for (unsigned i = 0; i < numFiles; i++) { + count[i] = 0; + } +} + +trackSequenceQueryRadNNReporterOneToOne::~trackSequenceQueryRadNNReporterOneToOne() { + delete set; + delete [] count; +} + +void trackSequenceQueryRadNNReporterOneToOne::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) { + std::set< NNresult >::iterator it; + NNresult r; + + r.qpos = qpos; + r.trackID = trackID; + r.spos = spos; + r.dist = dist; + + if(point_queue->size() < r.qpos + 1){ + point_queue->resize( r.qpos + 1 ); + (*point_queue)[r.qpos].dist = 1e6; + } + + if (r.dist < (*point_queue)[r.qpos].dist) + (*point_queue)[r.qpos] = r; + +} + +void trackSequenceQueryRadNNReporterOneToOne::report(char *fileTable, adb__queryResponse *adbQueryResponse) { + if(adbQueryResponse==0) { + std::vector< NNresult >::iterator vit; + NNresult rk; + for( vit = point_queue->begin() ; vit < point_queue->end() ; vit++ ){ + rk = *vit; + std::cout << rk.dist << " " + << rk.qpos << " " + << rk.spos << " " + << fileTable + rk.trackID*O2_FILETABLE_ENTRY_SIZE + << std::endl; + } + } else { + // FIXME + } +}
--- a/tests/0001/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0001/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0002/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0002/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0003/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0003/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0004/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0004/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0006/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0006/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0007/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0007/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0008/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0008/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0009/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0009/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0010/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0010/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0011/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0011/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0012/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0012/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0014/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0014/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0016/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0016/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0017/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0017/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0018/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0018/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0019/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0019/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0020/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0020/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0021/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0021/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0022/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0022/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0023/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0023/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0024/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0024/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0025/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0025/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0026/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0026/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0027/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0027/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0028/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0028/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0029/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0029/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0030/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0030/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0031/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0031/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0032/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0032/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0033/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0033/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0034/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0034/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/0035/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/0035/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0036/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -0,0 +1,78 @@ +#! /bin/bash + +. ../test-utils.sh + +if [ -f testdb ]; then rm -f testdb; fi + +${AUDIODB} -d testdb -N + +intstring 2 > testfeature01 +floatstring 0 1 >> testfeature01 +floatstring 1 0 >> testfeature01 +intstring 2 > testfeature10 +floatstring 1 0 >> testfeature10 +floatstring 0 1 >> testfeature10 + +cat > testfeaturefiles <<EOF +testfeature01 +testfeature10 +EOF + +${AUDIODB} -d testdb -B -F testfeaturefiles + +# sequence queries require L2NORM +${AUDIODB} -d testdb -L + +echo "query point (0.0,0.5)" +intstring 2 > testquery +floatstring 0 0.5 >> testquery + +${AUDIODB} -d testdb -Q nsequence -l 1 -f testquery > testoutput +echo testfeature01 1 > test-expected-output +echo 0 0 0 >> test-expected-output +echo 2 0 1 >> test-expected-output +echo testfeature10 1 >> test-expected-output +echo 0 0 1 >> test-expected-output +echo 2 0 0 >> test-expected-output +cmp testoutput test-expected-output + +${AUDIODB} -d testdb -Q nsequence -l 1 -f testquery -n 2 > testoutput +cmp testoutput test-expected-output + +${AUDIODB} -d testdb -Q nsequence -l 1 -f testquery -n 5 > testoutput +cmp testoutput test-expected-output + +${AUDIODB} -d testdb -Q nsequence -l 1 -f testquery -n 1 > testoutput +echo testfeature01 0 > test-expected-output +echo 0 0 0 >> test-expected-output +echo testfeature10 0 >> test-expected-output +echo 0 0 1 >> test-expected-output +cmp testoutput test-expected-output + +echo "query point (0.5,0.0)" +intstring 2 > testquery +floatstring 0.5 0 >> testquery + +${AUDIODB} -d testdb -Q nsequence -l 1 -f testquery > testoutput +echo testfeature01 1 > test-expected-output +echo 0 0 1 >> test-expected-output +echo 2 0 0 >> test-expected-output +echo testfeature10 1 >> test-expected-output +echo 0 0 0 >> test-expected-output +echo 2 0 1 >> test-expected-output +cmp testoutput test-expected-output + +${AUDIODB} -d testdb -Q nsequence -l 1 -f testquery -n 2 > testoutput +cmp testoutput test-expected-output + +${AUDIODB} -d testdb -Q nsequence -l 1 -f testquery -n 5 > testoutput +cmp testoutput test-expected-output + +${AUDIODB} -d testdb -Q nsequence -l 1 -f testquery -n 1 > testoutput +echo testfeature01 0 > test-expected-output +echo 0 0 1 >> test-expected-output +echo testfeature10 0 >> test-expected-output +echo 0 0 0 >> test-expected-output +cmp testoutput test-expected-output + +exit 104
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0036/short-description Tue Jul 01 09:12:40 2008 +0000 @@ -0,0 +1,1 @@ +nsequence search \ No newline at end of file
--- a/tests/9000/run-test.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/9000/run-test.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,4 +1,4 @@ -#! /bin/sh +#! /bin/bash . ../test-utils.sh
--- a/tests/run-tests.sh Mon Dec 17 16:44:37 2007 +0000 +++ b/tests/run-tests.sh Tue Jul 01 09:12:40 2008 +0000 @@ -1,12 +1,12 @@ -#! /bin/sh +#! /bin/bash AUDIODB=../../${EXECUTABLE:-audioDB} export AUDIODB -if [ -x ${AUDIODB:3} ]; then +if [ -x ${AUDIODB#../} ]; then : else - echo Cannot execute audioDB: ${AUDIODB:3} + echo Cannot execute audioDB: ${AUDIODB#../} exit 1 fi @@ -24,7 +24,7 @@ awk '{ printf(" (%s)",$0) }' < ${file}/short-description fi echo -n : - (cd ${file} && sh ./run-test.sh > test.out 2> test.err) + (cd ${file} && /bin/bash ./run-test.sh > test.out 2> test.err) EXIT_STATUS=$? if [ ${EXIT_STATUS} -eq 14 ]; then echo " n/a."