changeset 206:3c7c8b84e4f3 refactoring

Delete pointQuery() and trackPointQuery() New convention for tests: an exit code of 14 means "not applicable", as in n/a = 14/1. Decorate the newly-failing tests with "exit 14".
author mas01cr
date Wed, 28 Nov 2007 17:04:09 +0000
parents 9fcc8e97c86f
children 861e4bc95547
files audioDB.h query.cpp tests/0003/run-test.sh tests/0004/run-test.sh tests/0009/run-test.sh tests/0014/run-test.sh tests/0019/run-test.sh tests/0032/run-test.sh tests/run-tests.sh
diffstat 9 files changed, 19 insertions(+), 486 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.h	Wed Nov 28 15:13:22 2007 +0000
+++ b/audioDB.h	Wed Nov 28 17:04:09 2007 +0000
@@ -191,8 +191,6 @@
     
   // private methods
   void error(const char* a, const char* b = "", const char *sysFunc = 0);
-  void pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
-  void trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
   void sequence_sum(double *buffer, int length, int seqlen);
   void sequence_sqrt(double *buffer, int length, int seqlen);
   void sequence_average(double *buffer, int length, int seqlen);
--- a/query.cpp	Wed Nov 28 15:13:22 2007 +0000
+++ b/query.cpp	Wed Nov 28 17:04:09 2007 +0000
@@ -14,27 +14,20 @@
   return true;
 }
 
-void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){  
-  switch(queryType){
-  case O2_POINT_QUERY:
-    pointQuery(dbName, inFile, adbQueryResponse);
-    break;
+void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
+  switch(queryType) {
   case O2_SEQUENCE_QUERY:
     if(radius==0)
       trackSequenceQueryNN(dbName, inFile, adbQueryResponse);
     else
       trackSequenceQueryRad(dbName, inFile, adbQueryResponse);
     break;
-  case O2_TRACK_QUERY:
-    trackPointQuery(dbName, inFile, adbQueryResponse);
-    break;
   default:
     error("unrecognized queryType in query()");
-    
   }  
 }
 
-//return ordinal position of key in keyTable
+// return ordinal position of key in keyTable
 unsigned audioDB::getKeyPos(char* key){  
   for(unsigned k=0; k<dbH->numFiles; k++)
     if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
@@ -43,478 +36,6 @@
   return O2_ERR_KEYNOTFOUND;
 }
 
-// Basic point query engine
-void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
-  
-  initTables(dbName, inFile);
-  
-  // For each input vector, find the closest pointNN matching output vectors and report
-  // we use stdout in this stub version
-  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
-
-  double* query = (double*)(indata+sizeof(int));
-  CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
-  double* data = dataBuf;
-  double* queryCopy = 0;
-
-  if( dbH->flags & O2_FLAG_L2NORM ){
-    // Make a copy of the query
-    queryCopy = new double[numVectors*dbH->dim];
-    qNorm = new double[numVectors];
-    assert(queryCopy&&qNorm);
-    memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
-    unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
-    query = queryCopy;
-  }
-
-  // Make temporary dynamic memory for results
-  assert(pointNN>0 && pointNN<=O2_MAXNN);
-  double distances[pointNN];
-  unsigned qIndexes[pointNN];
-  unsigned sIndexes[pointNN];
-  for(unsigned k=0; k<pointNN; k++){
-    distances[k]=-DBL_MAX;
-    qIndexes[k]=~0;
-    sIndexes[k]=~0;    
-  }
-
-  unsigned j=numVectors; 
-  unsigned k,l,n;
-  double thisDist;
-
-  unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double));
-  double meanQdur = 0;
-  double *timesdata = 0;
-  double *querydurs = 0;
-  double *dbdurs = 0;
-
-  if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
-    std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
-    usingTimes=0;
-  }
-
-  else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
-    std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
-  
-  else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
-    timesdata = new double[2*numVectors];
-    querydurs = new double[numVectors];
-    insertTimeStamps(numVectors, timesFile, timesdata);
-    // Calculate durations of points
-    for(k=0; k<numVectors-1; k++){
-      querydurs[k]=timesdata[2*k+1]-timesdata[2*k];
-      meanQdur+=querydurs[k];
-    }
-    meanQdur/=k;
-    // Individual exhaustive timepoint durations
-    dbdurs = new double[totalVecs];
-    for(k=0; k<totalVecs-1; k++) {
-      dbdurs[k]=timesTable[2*k+1]-timesTable[2*k];
-    }
-  }
-
-  if(usingQueryPoint)
-    if(queryPoint>numVectors-1)
-      error("queryPoint > numVectors in query");
-    else{
-      if(verbosity>1) {
-	std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
-      }
-      query=query+queryPoint*dbH->dim;
-      numVectors=queryPoint+1;
-      j=1;
-    }
-
-  gettimeofday(&tv1, NULL);   
-  while(j--){ // query
-    data=dataBuf;
-    k=totalVecs; // number of database vectors
-    while(k--){  // database
-      thisDist=0;
-      l=dbH->dim;
-      double* q=query;
-      while(l--)
-	thisDist+=*q++**data++;
-      if(!usingTimes || 
-	 (usingTimes 
-	  && fabs(dbdurs[totalVecs-k-1]-querydurs[numVectors-j-1])<querydurs[numVectors-j-1]*timesTol)){
-	n=pointNN;
-	while(n--){
-	  if(thisDist>=distances[n]){
-	    if((n==0 || thisDist<=distances[n-1])){
-	      // Copy all values above up the queue
-	      for( l=pointNN-1 ; l >= n+1 ; l--){
-		distances[l]=distances[l-1];
-		qIndexes[l]=qIndexes[l-1];
-		sIndexes[l]=sIndexes[l-1];	      
-	      }
-	      distances[n]=thisDist;
-	      qIndexes[n]=numVectors-j-1;
-	      sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1;
-	      break;
-	    }
-	  }
-	  else
-	    break;
-	}
-      }
-    }
-    // Move query pointer to next query point
-    query+=dbH->dim;
-  }
-
-  gettimeofday(&tv2, NULL); 
-  if(verbosity>1) {
-    std::cerr << std::endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
-  }
-
-  if(adbQueryResponse==0){
-    // Output answer
-    // Loop over nearest neighbours    
-    for(k=0; k < pointNN; k++){
-      // Scan for key
-      unsigned cumTrack=0;
-      for(l=0 ; l<dbH->numFiles; l++){
-	cumTrack+=trackTable[l];
-	if(sIndexes[k]<cumTrack){
-	  std::cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " 
-	       << sIndexes[k]+trackTable[l]-cumTrack << std::endl;
-	  break;
-	}
-      }
-    }
-  }
-  else{ // Process Web Services Query
-    int listLen;
-    for(k = 0; k < pointNN; k++) {
-      if(distances[k] == -DBL_MAX)
-        break;
-    }
-    listLen = k;
-
-    adbQueryResponse->result.__sizeRlist=listLen;
-    adbQueryResponse->result.__sizeDist=listLen;
-    adbQueryResponse->result.__sizeQpos=listLen;
-    adbQueryResponse->result.__sizeSpos=listLen;
-    adbQueryResponse->result.Rlist= new char*[listLen];
-    adbQueryResponse->result.Dist = new double[listLen];
-    adbQueryResponse->result.Qpos = new unsigned int[listLen];
-    adbQueryResponse->result.Spos = new unsigned int[listLen];
-    for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
-      adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
-      adbQueryResponse->result.Dist[k]=distances[k];
-      adbQueryResponse->result.Qpos[k]=qIndexes[k];
-      unsigned cumTrack=0;
-      for(l=0 ; l<dbH->numFiles; l++){
-	cumTrack+=trackTable[l];
-	if(sIndexes[k]<cumTrack){
-	  sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE);
-	  break;
-	}
-      }
-      adbQueryResponse->result.Spos[k]=sIndexes[k]+trackTable[l]-cumTrack;
-    }
-  }
-  
-  // Clean up
-  if(queryCopy)
-    delete queryCopy;
-  if(qNorm)
-    delete qNorm;
-  if(timesdata)
-    delete[] timesdata;
-  if(querydurs)
-    delete[] querydurs;
-  if(dbdurs)
-    delete dbdurs;
-}
-
-// trackPointQuery  
-// return the trackNN closest tracks to the query track
-// uses average of pointNN points per track 
-void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
-  initTables(dbName, inFile);
-  
-  // For each input vector, find the closest pointNN matching output vectors and report
-  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
-  double* query = (double*)(indata+sizeof(int));
-  double* data;
-  double* queryCopy = 0;
-
-  if( dbH->flags & O2_FLAG_L2NORM ){
-    // Make a copy of the query
-    queryCopy = new double[numVectors*dbH->dim];
-    qNorm = new double[numVectors];
-    assert(queryCopy&&qNorm);
-    memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
-    unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
-    query = queryCopy;
-  }
-
-  assert(pointNN>0 && pointNN<=O2_MAXNN);
-  assert(trackNN>0 && trackNN<=O2_MAXNN);
-
-  // Make temporary dynamic memory for results
-  double trackDistances[trackNN];
-  unsigned trackIDs[trackNN];
-  unsigned trackQIndexes[trackNN];
-  unsigned trackSIndexes[trackNN];
-
-  double distances[pointNN];
-  unsigned qIndexes[pointNN];
-  unsigned sIndexes[pointNN];
-
-  unsigned j=numVectors; // number of query points
-  unsigned k,l,n, track, trackOffset=0, processedTracks=0;
-  double thisDist;
-
-  for(k=0; k<pointNN; k++){
-    distances[k]=-DBL_MAX;
-    qIndexes[k]=~0;
-    sIndexes[k]=~0;    
-  }
-
-  for(k=0; k<trackNN; k++){
-    trackDistances[k]=-DBL_MAX;
-    trackQIndexes[k]=~0;
-    trackSIndexes[k]=~0;
-    trackIDs[k]=~0;
-  }
-
-  double meanQdur = 0;
-  double *timesdata = 0;
-  double *querydurs = 0;
-  double *meanDBdur = 0;
-  
-  if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
-    std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
-    usingTimes=0;
-  }
-  
-  else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
-    std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
-  
-  else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
-    timesdata = new double[2*numVectors];
-    querydurs = new double[numVectors];
-    insertTimeStamps(numVectors, timesFile, timesdata);
-    // Calculate durations of points
-    for(k=0; k<numVectors-1; k++) {
-      querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
-      meanQdur += querydurs[k];
-    }
-    meanQdur/=k;
-    meanDBdur = new double[dbH->numFiles];
-    for(k=0; k<dbH->numFiles; k++){
-      meanDBdur[k]=0.0;
-      for(j=0; j<trackTable[k]-1 ; j++) {
-	meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
-      }
-      meanDBdur[k]/=j;
-    }
-  }
-
-  if(usingQueryPoint)
-    if(queryPoint>numVectors-1)
-      error("queryPoint > numVectors in query");
-    else{
-      if(verbosity>1) {
-	std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
-      }
-      query=query+queryPoint*dbH->dim;
-      numVectors=queryPoint+1;
-    }
-  
-  // build track offset table
-  off_t *trackOffsetTable = new off_t[dbH->numFiles];
-  unsigned cumTrack=0;
-  off_t trackIndexOffset;
-  for(k=0; k<dbH->numFiles;k++){
-    trackOffsetTable[k]=cumTrack;
-    cumTrack+=trackTable[k]*dbH->dim;
-  }
-
-  char nextKey[MAXSTR];
-
-  gettimeofday(&tv1, NULL); 
-
-  size_t data_buffer_size = 0;
-  double *data_buffer = 0;
-  lseek(dbfid, dbH->dataOffset, SEEK_SET);
-        
-  for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
-
-    trackOffset = trackOffsetTable[track];     // numDoubles offset
-
-    // get trackID from file if using a control file
-    if(trackFile) {
-      trackFile->getline(nextKey,MAXSTR);
-      if(!trackFile->eof()) {
-	track = getKeyPos(nextKey);
-        trackOffset = trackOffsetTable[track];
-        lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
-      } else {
-	break;
-      }
-    }
-
-    trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
-
-    if(verbosity>7) {
-      std::cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";std::cerr.flush();
-    }
-
-    if(dbH->flags & O2_FLAG_L2NORM)
-      usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy;
-    else
-      usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int));
-    if(usingQueryPoint)
-      j=1;
-    else
-      j=numVectors;
-
-    if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
-      if(data_buffer) {
-        free(data_buffer);
-      }
-      { 
-        data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
-        void *tmp = malloc(data_buffer_size);
-        if (tmp == NULL) {
-          error("error allocating data buffer");
-        }
-        data_buffer = (double *) tmp;
-      }
-    }
-
-    read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
-
-    while(j--){
-      k=trackTable[track];  // number of vectors in track
-      data=data_buffer; // data for track
-      while(k--){
-	thisDist=0;
-	l=dbH->dim;
-	double* q=query;
-	while(l--)
-	  thisDist+=*q++**data++;
-	if(!usingTimes || 
-	   (usingTimes 
-	    && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
-	  n=pointNN;
-	  while(n--){
-	    if(thisDist>=distances[n]){
-	      if((n==0 || thisDist<=distances[n-1])){
-		// Copy all values above up the queue
-		for( l=pointNN-1 ; l > n ; l--){
-		  distances[l]=distances[l-1];
-		  qIndexes[l]=qIndexes[l-1];
-		  sIndexes[l]=sIndexes[l-1];	      
-		}
-		distances[n]=thisDist;
-		qIndexes[n]=numVectors-j-1;
-		sIndexes[n]=trackTable[track]-k-1;
-		break;
-	      }
-	    }
-	    else
-	      break;
-	  }
-	}
-      } // track
-      // Move query pointer to next query point
-      query+=dbH->dim;
-    } // query 
-    // Take the average of this track's distance
-    // Test the track distances
-    thisDist=0;
-    for (n = 0; n < pointNN; n++) {
-      if (distances[n] == -DBL_MAX) break;
-      thisDist += distances[n];
-    }
-    thisDist /= n;
-
-    n=trackNN;
-    while(n--){
-      if(thisDist>=trackDistances[n]){
-	if((n==0 || thisDist<=trackDistances[n-1])){
-	  // Copy all values above up the queue
-	  for( l=trackNN-1 ; l > n ; l--){
-	    trackDistances[l]=trackDistances[l-1];
-	    trackQIndexes[l]=trackQIndexes[l-1];
-	    trackSIndexes[l]=trackSIndexes[l-1];
-	    trackIDs[l]=trackIDs[l-1];
-	  }
-	  trackDistances[n]=thisDist;
-	  trackQIndexes[n]=qIndexes[0];
-	  trackSIndexes[n]=sIndexes[0];
-	  trackIDs[n]=track;
-	  break;
-	}
-      }
-      else
-	break;
-    }
-    for(unsigned k=0; k<pointNN; k++){
-      distances[k]=-DBL_MAX;
-      qIndexes[k]=~0;
-      sIndexes[k]=~0;    
-    }
-  } // tracks
-
-  free(data_buffer);
-
-  gettimeofday(&tv2, NULL); 
-
-  if(verbosity>1) {
-    std::cerr << std::endl << "processed tracks :" << processedTracks 
-	 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
-  }
-
-  if(adbQueryResponse==0){
-    if(verbosity>1) {
-      std::cerr<<std::endl;
-    }
-    // Output answer
-    // Loop over nearest neighbours
-    for(k=0; k < std::min(trackNN,processedTracks); k++)
-      std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE 
-	   << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << std::endl;
-  }
-  else{ // Process Web Services Query
-    int listLen = std::min(trackNN, processedTracks);
-    adbQueryResponse->result.__sizeRlist=listLen;
-    adbQueryResponse->result.__sizeDist=listLen;
-    adbQueryResponse->result.__sizeQpos=listLen;
-    adbQueryResponse->result.__sizeSpos=listLen;
-    adbQueryResponse->result.Rlist= new char*[listLen];
-    adbQueryResponse->result.Dist = new double[listLen];
-    adbQueryResponse->result.Qpos = new unsigned int[listLen];
-    adbQueryResponse->result.Spos = new unsigned int[listLen];
-    for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
-      adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
-      adbQueryResponse->result.Dist[k]=trackDistances[k];
-      adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
-      adbQueryResponse->result.Spos[k]=trackSIndexes[k];
-      sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
-    }
-  }
-    
-  // Clean up
-  if(trackOffsetTable)
-    delete trackOffsetTable;
-  if(queryCopy)
-    delete queryCopy;
-  if(qNorm)
-    delete qNorm;
-  if(timesdata)
-    delete[] timesdata;
-  if(querydurs)
-    delete[] querydurs;
-  if(meanDBdur)
-    delete meanDBdur;
-}
-
 // This is a common pattern in sequence queries: what we are doing is
 // taking a window of length seqlen over a buffer of length length,
 // and placing the sum of the elements in that window in the first
--- a/tests/0003/run-test.sh	Wed Nov 28 15:13:22 2007 +0000
+++ b/tests/0003/run-test.sh	Wed Nov 28 17:04:09 2007 +0000
@@ -1,5 +1,7 @@
 #! /bin/sh
 
+exit 14
+
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/0004/run-test.sh	Wed Nov 28 15:13:22 2007 +0000
+++ b/tests/0004/run-test.sh	Wed Nov 28 17:04:09 2007 +0000
@@ -1,5 +1,7 @@
 #! /bin/sh
 
+exit 14
+
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/0009/run-test.sh	Wed Nov 28 15:13:22 2007 +0000
+++ b/tests/0009/run-test.sh	Wed Nov 28 17:04:09 2007 +0000
@@ -1,5 +1,7 @@
 #! /bin/sh
 
+exit 14
+
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/0014/run-test.sh	Wed Nov 28 15:13:22 2007 +0000
+++ b/tests/0014/run-test.sh	Wed Nov 28 17:04:09 2007 +0000
@@ -1,5 +1,7 @@
 #! /bin/sh
 
+exit 14
+
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/0019/run-test.sh	Wed Nov 28 15:13:22 2007 +0000
+++ b/tests/0019/run-test.sh	Wed Nov 28 17:04:09 2007 +0000
@@ -1,5 +1,7 @@
 #! /bin/sh
 
+exit 14
+
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/0032/run-test.sh	Wed Nov 28 15:13:22 2007 +0000
+++ b/tests/0032/run-test.sh	Wed Nov 28 17:04:09 2007 +0000
@@ -1,5 +1,7 @@
 #! /bin/sh
 
+exit 14
+
 . ../test-utils.sh
 
 if [ -f testdb ]; then rm -f testdb; fi
--- a/tests/run-tests.sh	Wed Nov 28 15:13:22 2007 +0000
+++ b/tests/run-tests.sh	Wed Nov 28 17:04:09 2007 +0000
@@ -26,8 +26,10 @@
       echo -n :
       (cd ${file} && sh ./run-test.sh > test.out 2> test.err)
       EXIT_STATUS=$?
-      if [ ${EXIT_STATUS} -ne 104 ]; then
-        echo " failed (exit status ${EXIT_STATUS})".
+      if [ ${EXIT_STATUS} -eq 14 ]; then
+        echo " n/a."
+      elif [ ${EXIT_STATUS} -ne 104 ]; then
+        echo " failed (exit status ${EXIT_STATUS})."
         FAILED=true
       else
         echo " success."