Mercurial > hg > audiodb
diff query.cpp @ 204:2ea1908707c7 refactoring
Filewise refactor.
Break apart huge monolithic audioDB.cpp file into seven broadly
independent portions:
* SOAP
* DB creation
* insertion
* query
* dump
* common functionality
* constructor functions
Remove the "using namespace std" from the header file, though that
wasn't actually a problem: the problem in question is solved by
including adb.nsmap in only soap.cpp.
Makefile improvements.
author | mas01cr |
---|---|
date | Wed, 28 Nov 2007 15:10:28 +0000 |
parents | |
children | 3c7c8b84e4f3 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/query.cpp Wed Nov 28 15:10:28 2007 +0000 @@ -0,0 +1,1559 @@ +#include "audioDB.h" + +bool audioDB::powers_acceptable(double p1, double p2) { + if (use_absolute_threshold) { + if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) { + return false; + } + } + if (use_relative_threshold) { + if (fabs(p1-p2) > fabs(relative_threshold)) { + return false; + } + } + return true; +} + +void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ + switch(queryType){ + case O2_POINT_QUERY: + pointQuery(dbName, inFile, adbQueryResponse); + break; + case O2_SEQUENCE_QUERY: + if(radius==0) + trackSequenceQueryNN(dbName, inFile, adbQueryResponse); + else + trackSequenceQueryRad(dbName, inFile, adbQueryResponse); + break; + case O2_TRACK_QUERY: + trackPointQuery(dbName, inFile, adbQueryResponse); + break; + default: + error("unrecognized queryType in query()"); + + } +} + +//return ordinal position of key in keyTable +unsigned audioDB::getKeyPos(char* key){ + for(unsigned k=0; k<dbH->numFiles; k++) + if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) + return k; + error("Key not found",key); + return O2_ERR_KEYNOTFOUND; +} + +// Basic point query engine +void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { + + initTables(dbName, inFile); + + // For each input vector, find the closest pointNN matching output vectors and report + // we use stdout in this stub version + unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); + + double* query = (double*)(indata+sizeof(int)); + CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); + double* data = dataBuf; + double* queryCopy = 0; + + if( dbH->flags & O2_FLAG_L2NORM ){ + // Make a copy of the query + queryCopy = new double[numVectors*dbH->dim]; + qNorm = new double[numVectors]; + assert(queryCopy&&qNorm); + memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); + unitNorm(queryCopy, dbH->dim, numVectors, qNorm); + query = queryCopy; + } + + // Make temporary dynamic memory for results + assert(pointNN>0 && pointNN<=O2_MAXNN); + double distances[pointNN]; + unsigned qIndexes[pointNN]; + unsigned sIndexes[pointNN]; + for(unsigned k=0; k<pointNN; k++){ + distances[k]=-DBL_MAX; + qIndexes[k]=~0; + sIndexes[k]=~0; + } + + unsigned j=numVectors; + unsigned k,l,n; + double thisDist; + + unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double)); + double meanQdur = 0; + double *timesdata = 0; + double *querydurs = 0; + double *dbdurs = 0; + + if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ + std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl; + usingTimes=0; + } + + else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) + std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl; + + else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ + timesdata = new double[2*numVectors]; + querydurs = new double[numVectors]; + insertTimeStamps(numVectors, timesFile, timesdata); + // Calculate durations of points + for(k=0; k<numVectors-1; k++){ + querydurs[k]=timesdata[2*k+1]-timesdata[2*k]; + meanQdur+=querydurs[k]; + } + meanQdur/=k; + // Individual exhaustive timepoint durations + dbdurs = new double[totalVecs]; + for(k=0; k<totalVecs-1; k++) { + dbdurs[k]=timesTable[2*k+1]-timesTable[2*k]; + } + } + + if(usingQueryPoint) + if(queryPoint>numVectors-1) + error("queryPoint > numVectors in query"); + else{ + if(verbosity>1) { + std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush(); + } + query=query+queryPoint*dbH->dim; + numVectors=queryPoint+1; + j=1; + } + + gettimeofday(&tv1, NULL); + while(j--){ // query + data=dataBuf; + k=totalVecs; // number of database vectors + while(k--){ // database + thisDist=0; + l=dbH->dim; + double* q=query; + while(l--) + thisDist+=*q++**data++; + if(!usingTimes || + (usingTimes + && fabs(dbdurs[totalVecs-k-1]-querydurs[numVectors-j-1])<querydurs[numVectors-j-1]*timesTol)){ + n=pointNN; + while(n--){ + if(thisDist>=distances[n]){ + if((n==0 || thisDist<=distances[n-1])){ + // Copy all values above up the queue + for( l=pointNN-1 ; l >= n+1 ; l--){ + distances[l]=distances[l-1]; + qIndexes[l]=qIndexes[l-1]; + sIndexes[l]=sIndexes[l-1]; + } + distances[n]=thisDist; + qIndexes[n]=numVectors-j-1; + sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1; + break; + } + } + else + break; + } + } + } + // Move query pointer to next query point + query+=dbH->dim; + } + + gettimeofday(&tv2, NULL); + if(verbosity>1) { + std::cerr << std::endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl; + } + + if(adbQueryResponse==0){ + // Output answer + // Loop over nearest neighbours + for(k=0; k < pointNN; k++){ + // Scan for key + unsigned cumTrack=0; + for(l=0 ; l<dbH->numFiles; l++){ + cumTrack+=trackTable[l]; + if(sIndexes[k]<cumTrack){ + std::cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " + << sIndexes[k]+trackTable[l]-cumTrack << std::endl; + break; + } + } + } + } + else{ // Process Web Services Query + int listLen; + for(k = 0; k < pointNN; k++) { + if(distances[k] == -DBL_MAX) + break; + } + listLen = k; + + adbQueryResponse->result.__sizeRlist=listLen; + adbQueryResponse->result.__sizeDist=listLen; + adbQueryResponse->result.__sizeQpos=listLen; + adbQueryResponse->result.__sizeSpos=listLen; + adbQueryResponse->result.Rlist= new char*[listLen]; + adbQueryResponse->result.Dist = new double[listLen]; + adbQueryResponse->result.Qpos = new unsigned int[listLen]; + adbQueryResponse->result.Spos = new unsigned int[listLen]; + for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ + adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; + adbQueryResponse->result.Dist[k]=distances[k]; + adbQueryResponse->result.Qpos[k]=qIndexes[k]; + unsigned cumTrack=0; + for(l=0 ; l<dbH->numFiles; l++){ + cumTrack+=trackTable[l]; + if(sIndexes[k]<cumTrack){ + sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); + break; + } + } + adbQueryResponse->result.Spos[k]=sIndexes[k]+trackTable[l]-cumTrack; + } + } + + // Clean up + if(queryCopy) + delete queryCopy; + if(qNorm) + delete qNorm; + if(timesdata) + delete[] timesdata; + if(querydurs) + delete[] querydurs; + if(dbdurs) + delete dbdurs; +} + +// trackPointQuery +// return the trackNN closest tracks to the query track +// uses average of pointNN points per track +void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { + initTables(dbName, inFile); + + // For each input vector, find the closest pointNN matching output vectors and report + unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); + double* query = (double*)(indata+sizeof(int)); + double* data; + double* queryCopy = 0; + + if( dbH->flags & O2_FLAG_L2NORM ){ + // Make a copy of the query + queryCopy = new double[numVectors*dbH->dim]; + qNorm = new double[numVectors]; + assert(queryCopy&&qNorm); + memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); + unitNorm(queryCopy, dbH->dim, numVectors, qNorm); + query = queryCopy; + } + + assert(pointNN>0 && pointNN<=O2_MAXNN); + assert(trackNN>0 && trackNN<=O2_MAXNN); + + // Make temporary dynamic memory for results + double trackDistances[trackNN]; + unsigned trackIDs[trackNN]; + unsigned trackQIndexes[trackNN]; + unsigned trackSIndexes[trackNN]; + + double distances[pointNN]; + unsigned qIndexes[pointNN]; + unsigned sIndexes[pointNN]; + + unsigned j=numVectors; // number of query points + unsigned k,l,n, track, trackOffset=0, processedTracks=0; + double thisDist; + + for(k=0; k<pointNN; k++){ + distances[k]=-DBL_MAX; + qIndexes[k]=~0; + sIndexes[k]=~0; + } + + for(k=0; k<trackNN; k++){ + trackDistances[k]=-DBL_MAX; + trackQIndexes[k]=~0; + trackSIndexes[k]=~0; + trackIDs[k]=~0; + } + + double meanQdur = 0; + double *timesdata = 0; + double *querydurs = 0; + double *meanDBdur = 0; + + if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ + std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl; + usingTimes=0; + } + + else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) + std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl; + + else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ + timesdata = new double[2*numVectors]; + querydurs = new double[numVectors]; + insertTimeStamps(numVectors, timesFile, timesdata); + // Calculate durations of points + for(k=0; k<numVectors-1; k++) { + querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; + meanQdur += querydurs[k]; + } + meanQdur/=k; + meanDBdur = new double[dbH->numFiles]; + for(k=0; k<dbH->numFiles; k++){ + meanDBdur[k]=0.0; + for(j=0; j<trackTable[k]-1 ; j++) { + meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j]; + } + meanDBdur[k]/=j; + } + } + + if(usingQueryPoint) + if(queryPoint>numVectors-1) + error("queryPoint > numVectors in query"); + else{ + if(verbosity>1) { + std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush(); + } + query=query+queryPoint*dbH->dim; + numVectors=queryPoint+1; + } + + // build track offset table + off_t *trackOffsetTable = new off_t[dbH->numFiles]; + unsigned cumTrack=0; + off_t trackIndexOffset; + for(k=0; k<dbH->numFiles;k++){ + trackOffsetTable[k]=cumTrack; + cumTrack+=trackTable[k]*dbH->dim; + } + + char nextKey[MAXSTR]; + + gettimeofday(&tv1, NULL); + + size_t data_buffer_size = 0; + double *data_buffer = 0; + lseek(dbfid, dbH->dataOffset, SEEK_SET); + + for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ + + trackOffset = trackOffsetTable[track]; // numDoubles offset + + // get trackID from file if using a control file + if(trackFile) { + trackFile->getline(nextKey,MAXSTR); + if(!trackFile->eof()) { + track = getKeyPos(nextKey); + trackOffset = trackOffsetTable[track]; + lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); + } else { + break; + } + } + + trackIndexOffset=trackOffset/dbH->dim; // numVectors offset + + if(verbosity>7) { + std::cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";std::cerr.flush(); + } + + if(dbH->flags & O2_FLAG_L2NORM) + usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; + else + usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); + if(usingQueryPoint) + j=1; + else + j=numVectors; + + if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { + if(data_buffer) { + free(data_buffer); + } + { + data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; + void *tmp = malloc(data_buffer_size); + if (tmp == NULL) { + error("error allocating data buffer"); + } + data_buffer = (double *) tmp; + } + } + + read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); + + while(j--){ + k=trackTable[track]; // number of vectors in track + data=data_buffer; // data for track + while(k--){ + thisDist=0; + l=dbH->dim; + double* q=query; + while(l--) + thisDist+=*q++**data++; + if(!usingTimes || + (usingTimes + && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ + n=pointNN; + while(n--){ + if(thisDist>=distances[n]){ + if((n==0 || thisDist<=distances[n-1])){ + // Copy all values above up the queue + for( l=pointNN-1 ; l > n ; l--){ + distances[l]=distances[l-1]; + qIndexes[l]=qIndexes[l-1]; + sIndexes[l]=sIndexes[l-1]; + } + distances[n]=thisDist; + qIndexes[n]=numVectors-j-1; + sIndexes[n]=trackTable[track]-k-1; + break; + } + } + else + break; + } + } + } // track + // Move query pointer to next query point + query+=dbH->dim; + } // query + // Take the average of this track's distance + // Test the track distances + thisDist=0; + for (n = 0; n < pointNN; n++) { + if (distances[n] == -DBL_MAX) break; + thisDist += distances[n]; + } + thisDist /= n; + + n=trackNN; + while(n--){ + if(thisDist>=trackDistances[n]){ + if((n==0 || thisDist<=trackDistances[n-1])){ + // Copy all values above up the queue + for( l=trackNN-1 ; l > n ; l--){ + trackDistances[l]=trackDistances[l-1]; + trackQIndexes[l]=trackQIndexes[l-1]; + trackSIndexes[l]=trackSIndexes[l-1]; + trackIDs[l]=trackIDs[l-1]; + } + trackDistances[n]=thisDist; + trackQIndexes[n]=qIndexes[0]; + trackSIndexes[n]=sIndexes[0]; + trackIDs[n]=track; + break; + } + } + else + break; + } + for(unsigned k=0; k<pointNN; k++){ + distances[k]=-DBL_MAX; + qIndexes[k]=~0; + sIndexes[k]=~0; + } + } // tracks + + free(data_buffer); + + gettimeofday(&tv2, NULL); + + if(verbosity>1) { + std::cerr << std::endl << "processed tracks :" << processedTracks + << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl; + } + + if(adbQueryResponse==0){ + if(verbosity>1) { + std::cerr<<std::endl; + } + // Output answer + // Loop over nearest neighbours + for(k=0; k < std::min(trackNN,processedTracks); k++) + std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE + << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << std::endl; + } + else{ // Process Web Services Query + int listLen = std::min(trackNN, processedTracks); + adbQueryResponse->result.__sizeRlist=listLen; + adbQueryResponse->result.__sizeDist=listLen; + adbQueryResponse->result.__sizeQpos=listLen; + adbQueryResponse->result.__sizeSpos=listLen; + adbQueryResponse->result.Rlist= new char*[listLen]; + adbQueryResponse->result.Dist = new double[listLen]; + adbQueryResponse->result.Qpos = new unsigned int[listLen]; + adbQueryResponse->result.Spos = new unsigned int[listLen]; + for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ + adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; + adbQueryResponse->result.Dist[k]=trackDistances[k]; + adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; + adbQueryResponse->result.Spos[k]=trackSIndexes[k]; + sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); + } + } + + // Clean up + if(trackOffsetTable) + delete trackOffsetTable; + if(queryCopy) + delete queryCopy; + if(qNorm) + delete qNorm; + if(timesdata) + delete[] timesdata; + if(querydurs) + delete[] querydurs; + if(meanDBdur) + delete meanDBdur; +} + +// This is a common pattern in sequence queries: what we are doing is +// taking a window of length seqlen over a buffer of length length, +// and placing the sum of the elements in that window in the first +// element of the window: thus replacing all but the last seqlen +// elements in the buffer the corresponding windowed sum. +void audioDB::sequence_sum(double *buffer, int length, int seqlen) { + double tmp1, tmp2, *ps; + int j, w; + + tmp1 = *buffer; + j = 1; + w = seqlen - 1; + while(w--) { + *buffer += buffer[j++]; + } + ps = buffer + 1; + w = length - seqlen; // +1 - 1 + while(w--) { + tmp2 = *ps; + *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1); + tmp1 = tmp2; + ps++; + } +} + +void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) { + int w = length - seqlen + 1; + while(w--) { + *buffer = sqrt(*buffer); + buffer++; + } +} + +void audioDB::sequence_average(double *buffer, int length, int seqlen) { + int w = length - seqlen + 1; + while(w--) { + *buffer /= seqlen; + buffer++; + } +} + +// k nearest-neighbor (k-NN) search between query and target tracks +// efficient implementation based on matched filter +// assumes normed shingles +// outputs distances of retrieved shingles, max retreived = pointNN shingles per per track +void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ + + initTables(dbName, inFile); + + // For each input vector, find the closest pointNN matching output vectors and report + // we use stdout in this stub version + unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); + double* query = (double*)(indata+sizeof(int)); + double* queryCopy = 0; + + if(!(dbH->flags & O2_FLAG_L2NORM) ) + error("Database must be L2 normed for sequence query","use -L2NORM"); + + if(numVectors<sequenceLength) + error("Query shorter than requested sequence length", "maybe use -l"); + + if(verbosity>1) { + std::cerr << "performing norms ... "; std::cerr.flush(); + } + unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); + + // Make a copy of the query + queryCopy = new double[numVectors*dbH->dim]; + memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); + qNorm = new double[numVectors]; + sNorm = new double[dbVectors]; + assert(qNorm&&sNorm&&queryCopy&&sequenceLength); + unitNorm(queryCopy, dbH->dim, numVectors, qNorm); + query = queryCopy; + + // Make norm measurements relative to sequenceLength + unsigned w = sequenceLength-1; + unsigned i,j; + + // Copy the L2 norm values to core to avoid disk random access later on + memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); + double* qnPtr = qNorm; + double* snPtr = sNorm; + + double *sPower = 0, *qPower = 0; + double *spPtr = 0, *qpPtr = 0; + + if (usingPower) { + if (!(dbH->flags & O2_FLAG_POWER)) { + error("database not power-enabled", dbName); + } + sPower = new double[dbVectors]; + spPtr = sPower; + memcpy(sPower, powerTable, dbVectors * sizeof(double)); + } + + for(i=0; i<dbH->numFiles; i++){ + if(trackTable[i]>=sequenceLength) { + sequence_sum(snPtr, trackTable[i], sequenceLength); + sequence_sqrt(snPtr, trackTable[i], sequenceLength); + + if (usingPower) { + sequence_sum(spPtr, trackTable[i], sequenceLength); + sequence_average(spPtr, trackTable[i], sequenceLength); + } + } + snPtr += trackTable[i]; + if (usingPower) { + spPtr += trackTable[i]; + } + } + + sequence_sum(qnPtr, numVectors, sequenceLength); + sequence_sqrt(qnPtr, numVectors, sequenceLength); + + if (usingPower) { + qPower = new double[numVectors]; + qpPtr = qPower; + if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { + error("error seeking to data", powerFileName, "lseek"); + } + int count = read(powerfd, qPower, numVectors * sizeof(double)); + if (count == -1) { + error("error reading data", powerFileName, "read"); + } + if ((unsigned) count != numVectors * sizeof(double)) { + error("short read", powerFileName); + } + + sequence_sum(qpPtr, numVectors, sequenceLength); + sequence_average(qpPtr, numVectors, sequenceLength); + } + + if(verbosity>1) { + std::cerr << "done." << std::endl; + } + + if(verbosity>1) { + std::cerr << "matching tracks..." << std::endl; + } + + assert(pointNN>0 && pointNN<=O2_MAXNN); + assert(trackNN>0 && trackNN<=O2_MAXNN); + + // Make temporary dynamic memory for results + double trackDistances[trackNN]; + unsigned trackIDs[trackNN]; + unsigned trackQIndexes[trackNN]; + unsigned trackSIndexes[trackNN]; + + double distances[pointNN]; + unsigned qIndexes[pointNN]; + unsigned sIndexes[pointNN]; + + + unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; + double thisDist; + + for(k=0; k<pointNN; k++){ + distances[k]=1.0e6; + qIndexes[k]=~0; + sIndexes[k]=~0; + } + + for(k=0; k<trackNN; k++){ + trackDistances[k]=1.0e6; + trackQIndexes[k]=~0; + trackSIndexes[k]=~0; + trackIDs[k]=~0; + } + + // Timestamp and durations processing + double meanQdur = 0; + double *timesdata = 0; + double *querydurs = 0; + double *meanDBdur = 0; + + if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ + std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl; + usingTimes=0; + } + + else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) + std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl; + + else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ + timesdata = new double[2*numVectors]; + querydurs = new double[numVectors]; + + insertTimeStamps(numVectors, timesFile, timesdata); + // Calculate durations of points + for(k=0; k<numVectors-1; k++) { + querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; + meanQdur += querydurs[k]; + } + meanQdur/=k; + if(verbosity>1) { + std::cerr << "mean query file duration: " << meanQdur << std::endl; + } + meanDBdur = new double[dbH->numFiles]; + assert(meanDBdur); + for(k=0; k<dbH->numFiles; k++){ + meanDBdur[k]=0.0; + for(j=0; j<trackTable[k]-1 ; j++) { + meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j]; + } + meanDBdur[k]/=j; + } + } + + if(usingQueryPoint) + if(queryPoint>numVectors || queryPoint>numVectors-wL+1) + error("queryPoint > numVectors-wL+1 in query"); + else{ + if(verbosity>1) { + std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush(); + } + query = query + queryPoint * dbH->dim; + qnPtr = qnPtr + queryPoint; + if (usingPower) { + qpPtr = qpPtr + queryPoint; + } + numVectors=wL; + } + + double ** D = 0; // Differences query and target + double ** DD = 0; // Matched filter distance + + D = new double*[numVectors]; + assert(D); + DD = new double*[numVectors]; + assert(DD); + + gettimeofday(&tv1, NULL); + unsigned processedTracks = 0; + unsigned successfulTracks=0; + + double* qp; + double* sp; + double* dp; + + // build track offset table + off_t *trackOffsetTable = new off_t[dbH->numFiles]; + unsigned cumTrack=0; + off_t trackIndexOffset; + for(k=0; k<dbH->numFiles;k++){ + trackOffsetTable[k]=cumTrack; + cumTrack+=trackTable[k]*dbH->dim; + } + + char nextKey [MAXSTR]; + + // chi^2 statistics + double sampleCount = 0; + double sampleSum = 0; + double logSampleSum = 0; + double minSample = 1e9; + double maxSample = 0; + + // Track loop + size_t data_buffer_size = 0; + double *data_buffer = 0; + lseek(dbfid, dbH->dataOffset, SEEK_SET); + + for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) { + + trackOffset = trackOffsetTable[track]; // numDoubles offset + + // get trackID from file if using a control file + if(trackFile) { + trackFile->getline(nextKey,MAXSTR); + if(!trackFile->eof()) { + track = getKeyPos(nextKey); + trackOffset = trackOffsetTable[track]; + lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); + } else { + break; + } + } + + trackIndexOffset=trackOffset/dbH->dim; // numVectors offset + + if(sequenceLength<=trackTable[track]){ // test for short sequences + + if(verbosity>7) { + std::cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";std::cerr.flush(); + } + + // Sum products matrix + for(j=0; j<numVectors;j++){ + D[j]=new double[trackTable[track]]; + assert(D[j]); + + } + + // Matched filter matrix + for(j=0; j<numVectors;j++){ + DD[j]=new double[trackTable[track]]; + assert(DD[j]); + } + + if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { + if(data_buffer) { + free(data_buffer); + } + { + data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; + void *tmp = malloc(data_buffer_size); + if (tmp == NULL) { + error("error allocating data buffer"); + } + data_buffer = (double *) tmp; + } + } + + read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); + + // Dot product + for(j=0; j<numVectors; j++) + for(k=0; k<trackTable[track]; k++){ + qp=query+j*dbH->dim; + sp=data_buffer+k*dbH->dim; + DD[j][k]=0.0; // Initialize matched filter array + dp=&D[j][k]; // point to correlation cell j,k + *dp=0.0; // initialize correlation cell + l=dbH->dim; // size of vectors + while(l--) + *dp+=*qp++**sp++; + } + + // Matched Filter + // HOP SIZE == 1 + double* spd; + if(HOP_SIZE==1){ // HOP_SIZE = shingleHop + for(w=0; w<wL; w++) + for(j=0; j<numVectors-w; j++){ + sp=DD[j]; + spd=D[j+w]+w; + k=trackTable[track]-w; + while(k--) + *sp+++=*spd++; + } + } + + else{ // HOP_SIZE != 1 + for(w=0; w<wL; w++) + for(j=0; j<numVectors-w; j+=HOP_SIZE){ + sp=DD[j]; + spd=D[j+w]+w; + for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){ + *sp+=*spd; + sp+=HOP_SIZE; + spd+=HOP_SIZE; + } + } + } + + if(verbosity>3 && usingTimes) { + std::cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << std::endl; + std::cerr.flush(); + } + + if(!usingTimes || + (usingTimes + && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ + + if(verbosity>3 && usingTimes) { + std::cerr << "within duration tolerance." << std::endl; + std::cerr.flush(); + } + + // Search for minimum distance by shingles (concatenated vectors) + for(j=0;j<=numVectors-wL;j+=HOP_SIZE) + for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ + thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; + if(verbosity>9) { + std::cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << std::endl; + } + // Gather chi^2 statistics + if(thisDist<minSample) + minSample=thisDist; + else if(thisDist>maxSample) + maxSample=thisDist; + if(thisDist>1e-9){ + sampleCount++; + sampleSum+=thisDist; + logSampleSum+=log(thisDist); + } + + // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); + // Power test + if (usingPower) { + if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) { + thisDist = 1000000.0; + } + } + + // k-NN match algorithm + m=pointNN; + while(m--){ + if(thisDist<=distances[m]) + if(m==0 || thisDist>=distances[m-1]){ + // Shuffle distances up the list + for(l=pointNN-1; l>m; l--){ + distances[l]=distances[l-1]; + qIndexes[l]=qIndexes[l-1]; + sIndexes[l]=sIndexes[l-1]; + } + distances[m]=thisDist; + if(usingQueryPoint) + qIndexes[m]=queryPoint; + else + qIndexes[m]=j; + sIndexes[m]=k; + break; + } + } + } + // Calculate the mean of the N-Best matches + thisDist=0.0; + for(m=0; m<pointNN; m++) { + if (distances[m] == 1000000.0) break; + thisDist+=distances[m]; + } + thisDist/=m; + + // Let's see the distances then... + if(verbosity>3) { + std::cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << std::endl; + } + + + // All the track stuff goes here + n=trackNN; + while(n--){ + if(thisDist<=trackDistances[n]){ + if((n==0 || thisDist>=trackDistances[n-1])){ + // Copy all values above up the queue + for( l=trackNN-1 ; l > n ; l--){ + trackDistances[l]=trackDistances[l-1]; + trackQIndexes[l]=trackQIndexes[l-1]; + trackSIndexes[l]=trackSIndexes[l-1]; + trackIDs[l]=trackIDs[l-1]; + } + trackDistances[n]=thisDist; + trackQIndexes[n]=qIndexes[0]; + trackSIndexes[n]=sIndexes[0]; + successfulTracks++; + trackIDs[n]=track; + break; + } + } + else + break; + } + } // Duration match + + // Clean up current track + if(D!=NULL){ + for(j=0; j<numVectors; j++) + delete[] D[j]; + } + + if(DD!=NULL){ + for(j=0; j<numVectors; j++) + delete[] DD[j]; + } + } + // per-track reset array values + for(unsigned k=0; k<pointNN; k++){ + distances[k]=1.0e6; + qIndexes[k]=~0; + sIndexes[k]=~0; + } + } + + free(data_buffer); + + gettimeofday(&tv2,NULL); + if(verbosity>1) { + std::cerr << std::endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" + << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl; + std::cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum + << " minSample: " << minSample << " maxSample: " << maxSample << std::endl; + } + if(adbQueryResponse==0){ + if(verbosity>1) { + std::cerr<<std::endl; + } + // Output answer + // Loop over nearest neighbours + for(k=0; k < std::min(trackNN,successfulTracks); k++) + std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " " + << trackQIndexes[k] << " " << trackSIndexes[k] << std::endl; + } + else{ // Process Web Services Query + int listLen = std::min(trackNN, processedTracks); + adbQueryResponse->result.__sizeRlist=listLen; + adbQueryResponse->result.__sizeDist=listLen; + adbQueryResponse->result.__sizeQpos=listLen; + adbQueryResponse->result.__sizeSpos=listLen; + adbQueryResponse->result.Rlist= new char*[listLen]; + adbQueryResponse->result.Dist = new double[listLen]; + adbQueryResponse->result.Qpos = new unsigned int[listLen]; + adbQueryResponse->result.Spos = new unsigned int[listLen]; + for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ + adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; + adbQueryResponse->result.Dist[k]=trackDistances[k]; + adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; + adbQueryResponse->result.Spos[k]=trackSIndexes[k]; + sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); + } + } + + // Clean up + if(trackOffsetTable) + delete[] trackOffsetTable; + if(queryCopy) + delete[] queryCopy; + if(qNorm) + delete[] qNorm; + if(sNorm) + delete[] sNorm; + if(qPower) + delete[] qPower; + if(sPower) + delete[] sPower; + if(D) + delete[] D; + if(DD) + delete[] DD; + if(timesdata) + delete[] timesdata; + if(querydurs) + delete[] querydurs; + if(meanDBdur) + delete[] meanDBdur; +} + +// Radius search between query and target tracks +// efficient implementation based on matched filter +// assumes normed shingles +// outputs count of retrieved shingles, max retreived = one shingle per query shingle per track +void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ + + initTables(dbName, inFile); + + // For each input vector, find the closest pointNN matching output vectors and report + // we use stdout in this stub version + unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); + double* query = (double*)(indata+sizeof(int)); + double* queryCopy = 0; + + if(!(dbH->flags & O2_FLAG_L2NORM) ) + error("Database must be L2 normed for sequence query","use -l2norm"); + + if(verbosity>1) { + std::cerr << "performing norms ... "; std::cerr.flush(); + } + unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); + + // Make a copy of the query + queryCopy = new double[numVectors*dbH->dim]; + memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); + qNorm = new double[numVectors]; + sNorm = new double[dbVectors]; + assert(qNorm&&sNorm&&queryCopy&&sequenceLength); + unitNorm(queryCopy, dbH->dim, numVectors, qNorm); + query = queryCopy; + + // Make norm measurements relative to sequenceLength + unsigned w = sequenceLength-1; + unsigned i,j; + + // Copy the L2 norm values to core to avoid disk random access later on + memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); + double* snPtr = sNorm; + double* qnPtr = qNorm; + + double *sPower = 0, *qPower = 0; + double *spPtr = 0, *qpPtr = 0; + + if (usingPower) { + if(!(dbH->flags & O2_FLAG_POWER)) { + error("database not power-enabled", dbName); + } + sPower = new double[dbVectors]; + spPtr = sPower; + memcpy(sPower, powerTable, dbVectors * sizeof(double)); + } + + for(i=0; i<dbH->numFiles; i++){ + if(trackTable[i]>=sequenceLength) { + sequence_sum(snPtr, trackTable[i], sequenceLength); + sequence_sqrt(snPtr, trackTable[i], sequenceLength); + if (usingPower) { + sequence_sum(spPtr, trackTable[i], sequenceLength); + sequence_average(spPtr, trackTable[i], sequenceLength); + } + } + snPtr += trackTable[i]; + if (usingPower) { + spPtr += trackTable[i]; + } + } + + sequence_sum(qnPtr, numVectors, sequenceLength); + sequence_sqrt(qnPtr, numVectors, sequenceLength); + + if (usingPower) { + qPower = new double[numVectors]; + qpPtr = qPower; + if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) { + error("error seeking to data", powerFileName, "lseek"); + } + int count = read(powerfd, qPower, numVectors * sizeof(double)); + if (count == -1) { + error("error reading data", powerFileName, "read"); + } + if ((unsigned) count != numVectors * sizeof(double)) { + error("short read", powerFileName); + } + + sequence_sum(qpPtr, numVectors, sequenceLength); + sequence_average(qpPtr, numVectors, sequenceLength); + } + + if(verbosity>1) { + std::cerr << "done." << std::endl; + } + + if(verbosity>1) { + std::cerr << "matching tracks..." << std::endl; + } + + assert(pointNN>0 && pointNN<=O2_MAXNN); + assert(trackNN>0 && trackNN<=O2_MAXNN); + + // Make temporary dynamic memory for results + double trackDistances[trackNN]; + unsigned trackIDs[trackNN]; + unsigned trackQIndexes[trackNN]; + unsigned trackSIndexes[trackNN]; + + double distances[pointNN]; + unsigned qIndexes[pointNN]; + unsigned sIndexes[pointNN]; + + + unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; + double thisDist; + + for(k=0; k<pointNN; k++){ + distances[k]=0.0; + qIndexes[k]=~0; + sIndexes[k]=~0; + } + + for(k=0; k<trackNN; k++){ + trackDistances[k]=0.0; + trackQIndexes[k]=~0; + trackSIndexes[k]=~0; + trackIDs[k]=~0; + } + + // Timestamp and durations processing + double meanQdur = 0; + double *timesdata = 0; + double *querydurs = 0; + double *meanDBdur = 0; + + if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ + std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl; + usingTimes=0; + } + + else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) + std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl; + + else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ + timesdata = new double[2*numVectors]; + querydurs = new double[numVectors]; + + insertTimeStamps(numVectors, timesFile, timesdata); + // Calculate durations of points + for(k=0; k<numVectors-1; k++){ + querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; + meanQdur += querydurs[k]; + } + meanQdur/=k; + if(verbosity>1) { + std::cerr << "mean query file duration: " << meanQdur << std::endl; + } + meanDBdur = new double[dbH->numFiles]; + assert(meanDBdur); + for(k=0; k<dbH->numFiles; k++){ + meanDBdur[k]=0.0; + for(j=0; j<trackTable[k]-1 ; j++) { + meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j]; + } + meanDBdur[k]/=j; + } + } + + if(usingQueryPoint) + if(queryPoint>numVectors || queryPoint>numVectors-wL+1) + error("queryPoint > numVectors-wL+1 in query"); + else{ + if(verbosity>1) { + std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush(); + } + query = query + queryPoint*dbH->dim; + qnPtr = qnPtr + queryPoint; + if (usingPower) { + qpPtr = qpPtr + queryPoint; + } + numVectors=wL; + } + + double ** D = 0; // Differences query and target + double ** DD = 0; // Matched filter distance + + D = new double*[numVectors]; + assert(D); + DD = new double*[numVectors]; + assert(DD); + + gettimeofday(&tv1, NULL); + unsigned processedTracks = 0; + unsigned successfulTracks=0; + + double* qp; + double* sp; + double* dp; + + // build track offset table + off_t *trackOffsetTable = new off_t[dbH->numFiles]; + unsigned cumTrack=0; + off_t trackIndexOffset; + for(k=0; k<dbH->numFiles;k++){ + trackOffsetTable[k]=cumTrack; + cumTrack+=trackTable[k]*dbH->dim; + } + + char nextKey [MAXSTR]; + + // chi^2 statistics + double sampleCount = 0; + double sampleSum = 0; + double logSampleSum = 0; + double minSample = 1e9; + double maxSample = 0; + + // Track loop + size_t data_buffer_size = 0; + double *data_buffer = 0; + lseek(dbfid, dbH->dataOffset, SEEK_SET); + + for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ + + trackOffset = trackOffsetTable[track]; // numDoubles offset + + // get trackID from file if using a control file + if(trackFile) { + trackFile->getline(nextKey,MAXSTR); + if(!trackFile->eof()) { + track = getKeyPos(nextKey); + trackOffset = trackOffsetTable[track]; + lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); + } else { + break; + } + } + + trackIndexOffset=trackOffset/dbH->dim; // numVectors offset + + if(sequenceLength<=trackTable[track]){ // test for short sequences + + if(verbosity>7) { + std::cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";std::cerr.flush(); + } + + // Sum products matrix + for(j=0; j<numVectors;j++){ + D[j]=new double[trackTable[track]]; + assert(D[j]); + + } + + // Matched filter matrix + for(j=0; j<numVectors;j++){ + DD[j]=new double[trackTable[track]]; + assert(DD[j]); + } + + if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { + if(data_buffer) { + free(data_buffer); + } + { + data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; + void *tmp = malloc(data_buffer_size); + if (tmp == NULL) { + error("error allocating data buffer"); + } + data_buffer = (double *) tmp; + } + } + + read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); + + // Dot product + for(j=0; j<numVectors; j++) + for(k=0; k<trackTable[track]; k++){ + qp=query+j*dbH->dim; + sp=data_buffer+k*dbH->dim; + DD[j][k]=0.0; // Initialize matched filter array + dp=&D[j][k]; // point to correlation cell j,k + *dp=0.0; // initialize correlation cell + l=dbH->dim; // size of vectors + while(l--) + *dp+=*qp++**sp++; + } + + // Matched Filter + // HOP SIZE == 1 + double* spd; + if(HOP_SIZE==1){ // HOP_SIZE = shingleHop + for(w=0; w<wL; w++) + for(j=0; j<numVectors-w; j++){ + sp=DD[j]; + spd=D[j+w]+w; + k=trackTable[track]-w; + while(k--) + *sp+++=*spd++; + } + } + + else{ // HOP_SIZE != 1 + for(w=0; w<wL; w++) + for(j=0; j<numVectors-w; j+=HOP_SIZE){ + sp=DD[j]; + spd=D[j+w]+w; + for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){ + *sp+=*spd; + sp+=HOP_SIZE; + spd+=HOP_SIZE; + } + } + } + + if(verbosity>3 && usingTimes) { + std::cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << std::endl; + std::cerr.flush(); + } + + if(!usingTimes || + (usingTimes + && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ + + if(verbosity>3 && usingTimes) { + std::cerr << "within duration tolerance." << std::endl; + std::cerr.flush(); + } + + // Search for minimum distance by shingles (concatenated vectors) + for(j=0;j<=numVectors-wL;j+=HOP_SIZE) + for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ + thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; + if(verbosity>9) { + std::cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << std::endl; + } + // Gather chi^2 statistics + if(thisDist<minSample) + minSample=thisDist; + else if(thisDist>maxSample) + maxSample=thisDist; + if(thisDist>1e-9){ + sampleCount++; + sampleSum+=thisDist; + logSampleSum+=log(thisDist); + } + + // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); + // Power test + if (usingPower) { + if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) { + thisDist = 1000000.0; + } + } + + if(thisDist>=0 && thisDist<=radius){ + distances[0]++; // increment count + break; // only need one track point per query point + } + } + // How many points were below threshold ? + thisDist=distances[0]; + + // Let's see the distances then... + if(verbosity>3) { + std::cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << std::endl; + } + + // All the track stuff goes here + n=trackNN; + while(n--){ + if(thisDist>trackDistances[n]){ + if((n==0 || thisDist<=trackDistances[n-1])){ + // Copy all values above up the queue + for( l=trackNN-1 ; l > n ; l--){ + trackDistances[l]=trackDistances[l-1]; + trackQIndexes[l]=trackQIndexes[l-1]; + trackSIndexes[l]=trackSIndexes[l-1]; + trackIDs[l]=trackIDs[l-1]; + } + trackDistances[n]=thisDist; + trackQIndexes[n]=qIndexes[0]; + trackSIndexes[n]=sIndexes[0]; + successfulTracks++; + trackIDs[n]=track; + break; + } + } + else + break; + } + } // Duration match + + // Clean up current track + if(D!=NULL){ + for(j=0; j<numVectors; j++) + delete[] D[j]; + } + + if(DD!=NULL){ + for(j=0; j<numVectors; j++) + delete[] DD[j]; + } + } + // per-track reset array values + for(unsigned k=0; k<pointNN; k++){ + distances[k]=0.0; + qIndexes[k]=~0; + sIndexes[k]=~0; + } + } + + free(data_buffer); + + gettimeofday(&tv2,NULL); + if(verbosity>1) { + std::cerr << std::endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" + << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl; + std::cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum + << " minSample: " << minSample << " maxSample: " << maxSample << std::endl; + } + + if(adbQueryResponse==0){ + if(verbosity>1) { + std::cerr<<std::endl; + } + // Output answer + // Loop over nearest neighbours + for(k=0; k < std::min(trackNN,successfulTracks); k++) + std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << std::endl; + } + else{ // Process Web Services Query + int listLen = std::min(trackNN, processedTracks); + adbQueryResponse->result.__sizeRlist=listLen; + adbQueryResponse->result.__sizeDist=listLen; + adbQueryResponse->result.__sizeQpos=listLen; + adbQueryResponse->result.__sizeSpos=listLen; + adbQueryResponse->result.Rlist= new char*[listLen]; + adbQueryResponse->result.Dist = new double[listLen]; + adbQueryResponse->result.Qpos = new unsigned int[listLen]; + adbQueryResponse->result.Spos = new unsigned int[listLen]; + for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ + adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; + adbQueryResponse->result.Dist[k]=trackDistances[k]; + adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; + adbQueryResponse->result.Spos[k]=trackSIndexes[k]; + sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); + } + } + + // Clean up + if(trackOffsetTable) + delete[] trackOffsetTable; + if(queryCopy) + delete[] queryCopy; + if(qNorm) + delete[] qNorm; + if(sNorm) + delete[] sNorm; + if(qPower) + delete[] qPower; + if(sPower) + delete[] sPower; + if(D) + delete[] D; + if(DD) + delete[] DD; + if(timesdata) + delete[] timesdata; + if(querydurs) + delete[] querydurs; + if(meanDBdur) + delete[] meanDBdur; +} + +// Unit norm block of features +void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ + unsigned d; + double L2, *p; + if(verbosity>2) { + std::cerr << "norming " << n << " vectors...";std::cerr.flush(); + } + while(n--){ + p=X; + L2=0.0; + d=dim; + while(d--){ + L2+=*p**p; + p++; + } + /* L2=sqrt(L2);*/ + if(qNorm) + *qNorm++=L2; + /* + oneOverL2 = 1.0/L2; + d=dim; + while(d--){ + *X*=oneOverL2; + X++; + */ + X+=dim; + } + if(verbosity>2) { + std::cerr << "done..." << std::endl; + } +}