Mercurial > hg > audiodb
changeset 206:3c7c8b84e4f3 refactoring
Delete pointQuery() and trackPointQuery()
New convention for tests: an exit code of 14 means "not applicable", as
in n/a = 14/1. Decorate the newly-failing tests with "exit 14".
author | mas01cr |
---|---|
date | Wed, 28 Nov 2007 17:04:09 +0000 |
parents | 9fcc8e97c86f |
children | 861e4bc95547 |
files | audioDB.h query.cpp tests/0003/run-test.sh tests/0004/run-test.sh tests/0009/run-test.sh tests/0014/run-test.sh tests/0019/run-test.sh tests/0032/run-test.sh tests/run-tests.sh |
diffstat | 9 files changed, 19 insertions(+), 486 deletions(-) [+] |
line wrap: on
line diff
--- a/audioDB.h Wed Nov 28 15:13:22 2007 +0000 +++ b/audioDB.h Wed Nov 28 17:04:09 2007 +0000 @@ -191,8 +191,6 @@ // private methods void error(const char* a, const char* b = "", const char *sysFunc = 0); - void pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0); - void trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0); void sequence_sum(double *buffer, int length, int seqlen); void sequence_sqrt(double *buffer, int length, int seqlen); void sequence_average(double *buffer, int length, int seqlen);
--- a/query.cpp Wed Nov 28 15:13:22 2007 +0000 +++ b/query.cpp Wed Nov 28 17:04:09 2007 +0000 @@ -14,27 +14,20 @@ return true; } -void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ - switch(queryType){ - case O2_POINT_QUERY: - pointQuery(dbName, inFile, adbQueryResponse); - break; +void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { + switch(queryType) { case O2_SEQUENCE_QUERY: if(radius==0) trackSequenceQueryNN(dbName, inFile, adbQueryResponse); else trackSequenceQueryRad(dbName, inFile, adbQueryResponse); break; - case O2_TRACK_QUERY: - trackPointQuery(dbName, inFile, adbQueryResponse); - break; default: error("unrecognized queryType in query()"); - } } -//return ordinal position of key in keyTable +// return ordinal position of key in keyTable unsigned audioDB::getKeyPos(char* key){ for(unsigned k=0; k<dbH->numFiles; k++) if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) @@ -43,478 +36,6 @@ return O2_ERR_KEYNOTFOUND; } -// Basic point query engine -void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { - - initTables(dbName, inFile); - - // For each input vector, find the closest pointNN matching output vectors and report - // we use stdout in this stub version - unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); - - double* query = (double*)(indata+sizeof(int)); - CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); - double* data = dataBuf; - double* queryCopy = 0; - - if( dbH->flags & O2_FLAG_L2NORM ){ - // Make a copy of the query - queryCopy = new double[numVectors*dbH->dim]; - qNorm = new double[numVectors]; - assert(queryCopy&&qNorm); - memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); - unitNorm(queryCopy, dbH->dim, numVectors, qNorm); - query = queryCopy; - } - - // Make temporary dynamic memory for results - assert(pointNN>0 && pointNN<=O2_MAXNN); - double distances[pointNN]; - unsigned qIndexes[pointNN]; - unsigned sIndexes[pointNN]; - for(unsigned k=0; k<pointNN; k++){ - distances[k]=-DBL_MAX; - qIndexes[k]=~0; - sIndexes[k]=~0; - } - - unsigned j=numVectors; - unsigned k,l,n; - double thisDist; - - unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double)); - double meanQdur = 0; - double *timesdata = 0; - double *querydurs = 0; - double *dbdurs = 0; - - if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ - std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl; - usingTimes=0; - } - - else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) - std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl; - - else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ - timesdata = new double[2*numVectors]; - querydurs = new double[numVectors]; - insertTimeStamps(numVectors, timesFile, timesdata); - // Calculate durations of points - for(k=0; k<numVectors-1; k++){ - querydurs[k]=timesdata[2*k+1]-timesdata[2*k]; - meanQdur+=querydurs[k]; - } - meanQdur/=k; - // Individual exhaustive timepoint durations - dbdurs = new double[totalVecs]; - for(k=0; k<totalVecs-1; k++) { - dbdurs[k]=timesTable[2*k+1]-timesTable[2*k]; - } - } - - if(usingQueryPoint) - if(queryPoint>numVectors-1) - error("queryPoint > numVectors in query"); - else{ - if(verbosity>1) { - std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush(); - } - query=query+queryPoint*dbH->dim; - numVectors=queryPoint+1; - j=1; - } - - gettimeofday(&tv1, NULL); - while(j--){ // query - data=dataBuf; - k=totalVecs; // number of database vectors - while(k--){ // database - thisDist=0; - l=dbH->dim; - double* q=query; - while(l--) - thisDist+=*q++**data++; - if(!usingTimes || - (usingTimes - && fabs(dbdurs[totalVecs-k-1]-querydurs[numVectors-j-1])<querydurs[numVectors-j-1]*timesTol)){ - n=pointNN; - while(n--){ - if(thisDist>=distances[n]){ - if((n==0 || thisDist<=distances[n-1])){ - // Copy all values above up the queue - for( l=pointNN-1 ; l >= n+1 ; l--){ - distances[l]=distances[l-1]; - qIndexes[l]=qIndexes[l-1]; - sIndexes[l]=sIndexes[l-1]; - } - distances[n]=thisDist; - qIndexes[n]=numVectors-j-1; - sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1; - break; - } - } - else - break; - } - } - } - // Move query pointer to next query point - query+=dbH->dim; - } - - gettimeofday(&tv2, NULL); - if(verbosity>1) { - std::cerr << std::endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl; - } - - if(adbQueryResponse==0){ - // Output answer - // Loop over nearest neighbours - for(k=0; k < pointNN; k++){ - // Scan for key - unsigned cumTrack=0; - for(l=0 ; l<dbH->numFiles; l++){ - cumTrack+=trackTable[l]; - if(sIndexes[k]<cumTrack){ - std::cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " - << sIndexes[k]+trackTable[l]-cumTrack << std::endl; - break; - } - } - } - } - else{ // Process Web Services Query - int listLen; - for(k = 0; k < pointNN; k++) { - if(distances[k] == -DBL_MAX) - break; - } - listLen = k; - - adbQueryResponse->result.__sizeRlist=listLen; - adbQueryResponse->result.__sizeDist=listLen; - adbQueryResponse->result.__sizeQpos=listLen; - adbQueryResponse->result.__sizeSpos=listLen; - adbQueryResponse->result.Rlist= new char*[listLen]; - adbQueryResponse->result.Dist = new double[listLen]; - adbQueryResponse->result.Qpos = new unsigned int[listLen]; - adbQueryResponse->result.Spos = new unsigned int[listLen]; - for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ - adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; - adbQueryResponse->result.Dist[k]=distances[k]; - adbQueryResponse->result.Qpos[k]=qIndexes[k]; - unsigned cumTrack=0; - for(l=0 ; l<dbH->numFiles; l++){ - cumTrack+=trackTable[l]; - if(sIndexes[k]<cumTrack){ - sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); - break; - } - } - adbQueryResponse->result.Spos[k]=sIndexes[k]+trackTable[l]-cumTrack; - } - } - - // Clean up - if(queryCopy) - delete queryCopy; - if(qNorm) - delete qNorm; - if(timesdata) - delete[] timesdata; - if(querydurs) - delete[] querydurs; - if(dbdurs) - delete dbdurs; -} - -// trackPointQuery -// return the trackNN closest tracks to the query track -// uses average of pointNN points per track -void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { - initTables(dbName, inFile); - - // For each input vector, find the closest pointNN matching output vectors and report - unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); - double* query = (double*)(indata+sizeof(int)); - double* data; - double* queryCopy = 0; - - if( dbH->flags & O2_FLAG_L2NORM ){ - // Make a copy of the query - queryCopy = new double[numVectors*dbH->dim]; - qNorm = new double[numVectors]; - assert(queryCopy&&qNorm); - memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); - unitNorm(queryCopy, dbH->dim, numVectors, qNorm); - query = queryCopy; - } - - assert(pointNN>0 && pointNN<=O2_MAXNN); - assert(trackNN>0 && trackNN<=O2_MAXNN); - - // Make temporary dynamic memory for results - double trackDistances[trackNN]; - unsigned trackIDs[trackNN]; - unsigned trackQIndexes[trackNN]; - unsigned trackSIndexes[trackNN]; - - double distances[pointNN]; - unsigned qIndexes[pointNN]; - unsigned sIndexes[pointNN]; - - unsigned j=numVectors; // number of query points - unsigned k,l,n, track, trackOffset=0, processedTracks=0; - double thisDist; - - for(k=0; k<pointNN; k++){ - distances[k]=-DBL_MAX; - qIndexes[k]=~0; - sIndexes[k]=~0; - } - - for(k=0; k<trackNN; k++){ - trackDistances[k]=-DBL_MAX; - trackQIndexes[k]=~0; - trackSIndexes[k]=~0; - trackIDs[k]=~0; - } - - double meanQdur = 0; - double *timesdata = 0; - double *querydurs = 0; - double *meanDBdur = 0; - - if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ - std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl; - usingTimes=0; - } - - else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) - std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl; - - else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ - timesdata = new double[2*numVectors]; - querydurs = new double[numVectors]; - insertTimeStamps(numVectors, timesFile, timesdata); - // Calculate durations of points - for(k=0; k<numVectors-1; k++) { - querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; - meanQdur += querydurs[k]; - } - meanQdur/=k; - meanDBdur = new double[dbH->numFiles]; - for(k=0; k<dbH->numFiles; k++){ - meanDBdur[k]=0.0; - for(j=0; j<trackTable[k]-1 ; j++) { - meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j]; - } - meanDBdur[k]/=j; - } - } - - if(usingQueryPoint) - if(queryPoint>numVectors-1) - error("queryPoint > numVectors in query"); - else{ - if(verbosity>1) { - std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush(); - } - query=query+queryPoint*dbH->dim; - numVectors=queryPoint+1; - } - - // build track offset table - off_t *trackOffsetTable = new off_t[dbH->numFiles]; - unsigned cumTrack=0; - off_t trackIndexOffset; - for(k=0; k<dbH->numFiles;k++){ - trackOffsetTable[k]=cumTrack; - cumTrack+=trackTable[k]*dbH->dim; - } - - char nextKey[MAXSTR]; - - gettimeofday(&tv1, NULL); - - size_t data_buffer_size = 0; - double *data_buffer = 0; - lseek(dbfid, dbH->dataOffset, SEEK_SET); - - for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ - - trackOffset = trackOffsetTable[track]; // numDoubles offset - - // get trackID from file if using a control file - if(trackFile) { - trackFile->getline(nextKey,MAXSTR); - if(!trackFile->eof()) { - track = getKeyPos(nextKey); - trackOffset = trackOffsetTable[track]; - lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); - } else { - break; - } - } - - trackIndexOffset=trackOffset/dbH->dim; // numVectors offset - - if(verbosity>7) { - std::cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";std::cerr.flush(); - } - - if(dbH->flags & O2_FLAG_L2NORM) - usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; - else - usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); - if(usingQueryPoint) - j=1; - else - j=numVectors; - - if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { - if(data_buffer) { - free(data_buffer); - } - { - data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; - void *tmp = malloc(data_buffer_size); - if (tmp == NULL) { - error("error allocating data buffer"); - } - data_buffer = (double *) tmp; - } - } - - read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); - - while(j--){ - k=trackTable[track]; // number of vectors in track - data=data_buffer; // data for track - while(k--){ - thisDist=0; - l=dbH->dim; - double* q=query; - while(l--) - thisDist+=*q++**data++; - if(!usingTimes || - (usingTimes - && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){ - n=pointNN; - while(n--){ - if(thisDist>=distances[n]){ - if((n==0 || thisDist<=distances[n-1])){ - // Copy all values above up the queue - for( l=pointNN-1 ; l > n ; l--){ - distances[l]=distances[l-1]; - qIndexes[l]=qIndexes[l-1]; - sIndexes[l]=sIndexes[l-1]; - } - distances[n]=thisDist; - qIndexes[n]=numVectors-j-1; - sIndexes[n]=trackTable[track]-k-1; - break; - } - } - else - break; - } - } - } // track - // Move query pointer to next query point - query+=dbH->dim; - } // query - // Take the average of this track's distance - // Test the track distances - thisDist=0; - for (n = 0; n < pointNN; n++) { - if (distances[n] == -DBL_MAX) break; - thisDist += distances[n]; - } - thisDist /= n; - - n=trackNN; - while(n--){ - if(thisDist>=trackDistances[n]){ - if((n==0 || thisDist<=trackDistances[n-1])){ - // Copy all values above up the queue - for( l=trackNN-1 ; l > n ; l--){ - trackDistances[l]=trackDistances[l-1]; - trackQIndexes[l]=trackQIndexes[l-1]; - trackSIndexes[l]=trackSIndexes[l-1]; - trackIDs[l]=trackIDs[l-1]; - } - trackDistances[n]=thisDist; - trackQIndexes[n]=qIndexes[0]; - trackSIndexes[n]=sIndexes[0]; - trackIDs[n]=track; - break; - } - } - else - break; - } - for(unsigned k=0; k<pointNN; k++){ - distances[k]=-DBL_MAX; - qIndexes[k]=~0; - sIndexes[k]=~0; - } - } // tracks - - free(data_buffer); - - gettimeofday(&tv2, NULL); - - if(verbosity>1) { - std::cerr << std::endl << "processed tracks :" << processedTracks - << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl; - } - - if(adbQueryResponse==0){ - if(verbosity>1) { - std::cerr<<std::endl; - } - // Output answer - // Loop over nearest neighbours - for(k=0; k < std::min(trackNN,processedTracks); k++) - std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE - << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << std::endl; - } - else{ // Process Web Services Query - int listLen = std::min(trackNN, processedTracks); - adbQueryResponse->result.__sizeRlist=listLen; - adbQueryResponse->result.__sizeDist=listLen; - adbQueryResponse->result.__sizeQpos=listLen; - adbQueryResponse->result.__sizeSpos=listLen; - adbQueryResponse->result.Rlist= new char*[listLen]; - adbQueryResponse->result.Dist = new double[listLen]; - adbQueryResponse->result.Qpos = new unsigned int[listLen]; - adbQueryResponse->result.Spos = new unsigned int[listLen]; - for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ - adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; - adbQueryResponse->result.Dist[k]=trackDistances[k]; - adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; - adbQueryResponse->result.Spos[k]=trackSIndexes[k]; - sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); - } - } - - // Clean up - if(trackOffsetTable) - delete trackOffsetTable; - if(queryCopy) - delete queryCopy; - if(qNorm) - delete qNorm; - if(timesdata) - delete[] timesdata; - if(querydurs) - delete[] querydurs; - if(meanDBdur) - delete meanDBdur; -} - // This is a common pattern in sequence queries: what we are doing is // taking a window of length seqlen over a buffer of length length, // and placing the sum of the elements in that window in the first
--- a/tests/0003/run-test.sh Wed Nov 28 15:13:22 2007 +0000 +++ b/tests/0003/run-test.sh Wed Nov 28 17:04:09 2007 +0000 @@ -1,5 +1,7 @@ #! /bin/sh +exit 14 + . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/0004/run-test.sh Wed Nov 28 15:13:22 2007 +0000 +++ b/tests/0004/run-test.sh Wed Nov 28 17:04:09 2007 +0000 @@ -1,5 +1,7 @@ #! /bin/sh +exit 14 + . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/0009/run-test.sh Wed Nov 28 15:13:22 2007 +0000 +++ b/tests/0009/run-test.sh Wed Nov 28 17:04:09 2007 +0000 @@ -1,5 +1,7 @@ #! /bin/sh +exit 14 + . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/0014/run-test.sh Wed Nov 28 15:13:22 2007 +0000 +++ b/tests/0014/run-test.sh Wed Nov 28 17:04:09 2007 +0000 @@ -1,5 +1,7 @@ #! /bin/sh +exit 14 + . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/0019/run-test.sh Wed Nov 28 15:13:22 2007 +0000 +++ b/tests/0019/run-test.sh Wed Nov 28 17:04:09 2007 +0000 @@ -1,5 +1,7 @@ #! /bin/sh +exit 14 + . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/0032/run-test.sh Wed Nov 28 15:13:22 2007 +0000 +++ b/tests/0032/run-test.sh Wed Nov 28 17:04:09 2007 +0000 @@ -1,5 +1,7 @@ #! /bin/sh +exit 14 + . ../test-utils.sh if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/run-tests.sh Wed Nov 28 15:13:22 2007 +0000 +++ b/tests/run-tests.sh Wed Nov 28 17:04:09 2007 +0000 @@ -26,8 +26,10 @@ echo -n : (cd ${file} && sh ./run-test.sh > test.out 2> test.err) EXIT_STATUS=$? - if [ ${EXIT_STATUS} -ne 104 ]; then - echo " failed (exit status ${EXIT_STATUS})". + if [ ${EXIT_STATUS} -eq 14 ]; then + echo " n/a." + elif [ ${EXIT_STATUS} -ne 104 ]; then + echo " failed (exit status ${EXIT_STATUS})." FAILED=true else echo " success."