Mercurial > hg > audiodb

--- a/Makefile	Wed Aug 29 16:24:29 2007 +0000
+++ b/Makefile	Mon Oct 01 14:40:08 2007 +0000
@@ -3,6 +3,8 @@

 EXECUTABLE=audioDB

+.PHONY: all clean test
+
 all: ${EXECUTABLE}

 ${EXECUTABLE}.1: ${EXECUTABLE}
@@ -17,12 +19,17 @@
 soapServer.cpp soapClient.cpp soapC.cpp: audioDBws.h
 	soapcpp2 audioDBws.h

-${EXECUTABLE}: audioDB.h audioDB.cpp soapServer.cpp soapClient.cpp soapC.cpp cmdline.c cmdline.h
-	g++ -o ${EXECUTABLE} ${CFLAGS} audioDB.cpp soapServer.cpp soapClient.cpp soapC.cpp cmdline.c ${LIBS}
+${EXECUTABLE}: audioDB.cpp soapServer.cpp soapClient.cpp soapC.cpp cmdline.c cmdline.h
+	g++ -c ${CFLAGS} -Wall -Werror audioDB.cpp
+	g++ -o ${EXECUTABLE} ${CFLAGS} audioDB.o soapServer.cpp soapClient.cpp soapC.cpp cmdline.c ${LIBS}

 clean:
 	-rm cmdline.c cmdline.h
 	-rm soapServer.cpp soapClient.cpp soapC.cpp soapObject.h soapStub.h soapProxy.h soapH.h soapServerLib.cpp soapClientLib.cpp
 	-rm adb.nsmap adb.xsd adb.wsdl adb.query.req.xml adb.query.res.xml adb.status.req.xml adb.status.res.xml
 	-rm README.txt
-	-rm ${EXECUTABLE} ${EXECUTABLE}.1
+	-rm ${EXECUTABLE} ${EXECUTABLE}.1 audioDB.o
+	-sh -c "cd tests && sh ./clean.sh"
+
+test: ${EXECUTABLE}
+	-sh -c "cd tests && sh ./run-tests.sh"
--- a/audioDB.cpp	Wed Aug 29 16:24:29 2007 +0000
+++ b/audioDB.cpp	Mon Oct 01 14:40:08 2007 +0000
@@ -1,146 +1,62 @@
-/* audioDB.cpp
-
-audioDB version 1.0
-
-A feature vector database management system for content-based retrieval.
-
-Usage: audioDB [OPTIONS]...
-
-      --full-help              Print help, including hidden options, and exit
-  -V, --version                Print version and exit
-  -H, --help                   print help on audioDB usage and exit.
-  -v, --verbosity=detail       level of detail of operational information.
-                                 (default=`1')
-
-Database Setup:
-  All database operations require a database argument.
-
-  Database commands are UPPER CASE. Command options are lower case.
-
-  -d, --database=filename      database file required by Database commands.
-  -N, --NEW                    make a new (initially empty) database.
-  -S, --STATUS                 output database information to stdout.
-  -D, --DUMP                   output all entries: index key size.
-  -L, --L2NORM                 unit norm vectors and norm all future inserts.
-
-Database Insertion:
-  The following commands insert feature files, with optional keys and
-  timestamps.
-
-  -I, --INSERT                 add feature vectors to an existing database.
-  -U, --UPDATE                 replace inserted vectors associated with key
-                                 with new input vectors.
-  -f, --features=filename      binary series of vectors file {int sz:ieee
-                                 double[][sz]:eof}.
-  -t, --times=filename         list of time points (ascii) for feature vectors.
-  -k, --key=identifier         unique identifier associated with features.
-
-  -B, --BATCHINSERT            add feature vectors named in a --featureList
-                                 file (with optional keys in a --keyList file)
-                                 to the named database.
-  -F, --featureList=filename   text file containing list of binary feature
-                                 vector files to process
-  -T, --timesList=filename     text file containing list of ascii --times for
-                                 each --features file in --featureList.
-  -K, --keyList=filename       text file containing list of unique identifiers
-                                 associated with --features.
-
-Database Search:
-  Thse commands control the retrieval behaviour.
-
-  -Q, --QUERY=searchtype       content-based search on --database using
-                                 --features as a query. Optionally restrict the
-                                 search to those tracks identified in a
-                                 --keyList.  (possible values="point",
-                                 "track", "sequence")
-  -p, --qpoint=position        ordinal position of query start point in
-                                 --features file.  (default=`0')
-  -e, --exhaustive             exhaustive search: iterate through all query
-                                 vectors in search. Overrides --qpoint.
-                                 (default=off)
-  -n, --pointnn=numpoints      number of point nearest neighbours to use in
-                                 retrieval.  (default=`10')
-  -R, --radius=DOUBLE          radius search, returns all
-                                 points/tracks/sequences inside given radius.
-                                  (default=`1.0')
-  -x, --expandfactor=DOUBLE    time compress/expand factor of result length to
-                                 query length [1.0 .. 100.0].  (default=`1.1')
-  -o, --rotate                 rotate query vectors for rotationally invariant
-                                 search.  (default=off)
-  -r, --resultlength=length    maximum length of the result list.
-                                 (default=`10')
-  -l, --sequencelength=length  length of sequences for sequence search.
-                                 (default=`16')
-  -h, --sequencehop=hop        hop size of sequence window for sequence search.
-                                  (default=`1')
-
-Web Services:
-  These commands enable the database process to establish a connection via the
-  internet and operate as separate client and server processes.
-
-  -s, --SERVER=port            run as standalone web service on named port.
-                                 (default=`80011')
-  -c, --client=hostname:port   run as a client using named host service.
-
-  Copyright (C) 2007 Michael Casey, Goldsmiths, University of London
-
-  outputs:
-
-  key1 distance1 qpos1 spos1
-  key2 distance2 qpos2 spos2
-  ...
-  keyN distanceN qposN sposN
-
-*/
-
 #include "audioDB.h"

 #define O2_DEBUG

 void audioDB::error(const char* a, const char* b, const char *sysFunc) {
-  cerr << a << ": " << b << endl;
-  if (sysFunc) {
-    perror(sysFunc);
+  if(isServer) {
+    char *err = new char[256]; /* FIXME: overflows */
+    snprintf(err, 255, "%s: %s\n%s", a, b, sysFunc ? strerror(errno) : "");
+    /* FIXME: actually we could usefully do with a properly structured
+       type, so that we can throw separate faultstring and details.
+       -- CSR, 2007-10-01 */
+    throw(err);
+  } else {
+    cerr << a << ": " << b << endl;
+    if (sysFunc) {
+      perror(sysFunc);
+    }
+    exit(1);
   }
-  exit(1);
 }

-audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult):
-  dim(0),
-  dbName(0),
-  inFile(0),
-  key(0),
-  trackFile(0),
-  trackFileName(0),
-  timesFile(0),
-  timesFileName(0),
-  usingTimes(0),
-  command(0),
-  dbfid(0),
-  db(0),
-  dbH(0),
-  infid(0),
-  indata(0),
-  queryType(O2_FLAG_POINT_QUERY),
-  verbosity(1),
-  pointNN(O2_DEFAULT_POINTNN),
-  trackNN(O2_DEFAULT_TRACKNN),
-  trackTable(0),
-  fileTable(0),
-  dataBuf(0),
-  l2normTable(0),
-  timesTable(0),
-  qNorm(0),
-  sequenceLength(16),
-  sequenceHop(1),
-  queryPoint(0),
-  usingQueryPoint(0),
-  isClient(0),
-  isServer(0),
-  port(0),
-  timesTol(0.1),
-  radius(0){
-
+#define O2_AUDIODB_INITIALIZERS \
+  dim(0), \
+  dbName(0), \
+  inFile(0), \
+  key(0), \
+  trackFileName(0), \
+  trackFile(0), \
+  command(0), \
+  timesFileName(0), \
+  timesFile(0), \
+  dbfid(0), \
+  infid(0), \
+  db(0), \
+  indata(0), \
+  dbH(0), \
+  fileTable(0), \
+  trackTable(0), \
+  dataBuf(0), \
+  l2normTable(0), \
+  qNorm(0), \
+  timesTable(0), \
+  verbosity(1), \
+  queryType(O2_FLAG_POINT_QUERY), \
+  pointNN(O2_DEFAULT_POINTNN), \
+  trackNN(O2_DEFAULT_TRACKNN), \
+  sequenceLength(16), \
+  sequenceHop(1), \
+  queryPoint(0), \
+  usingQueryPoint(0), \
+  usingTimes(0), \
+  isClient(0), \
+  isServer(0), \
+  port(0), \
+  timesTol(0.1), \
+  radius(0)
+
+audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS
+{
   if(processArgs(argc, argv)<0){
     printf("No command found.\n");
     cmdline_parser_print_version ();
@@ -152,7 +68,7 @@
     printf("%s\n", gengetopt_args_info_help[0]);
     exit(1);
   }
-
+
   if(O2_ACTION(COM_SERVER))
     startServer();

@@ -169,7 +85,7 @@
     if(isClient)
       ws_query(dbName, inFile, (char*)hostport);
     else
-      query(dbName, inFile, adbQueryResult);
+      query(dbName, inFile);

   else if(O2_ACTION(COM_STATUS))
     if(isClient)
@@ -187,6 +103,22 @@
     error("Unrecognized command",command);
 }

+audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult): O2_AUDIODB_INITIALIZERS
+{
+  processArgs(argc, argv);
+  isServer = 1; // FIXME: Hack
+  assert(O2_ACTION(COM_QUERY));
+  query(dbName, inFile, adbQueryResult);
+}
+
+audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResult *adbStatusResult): O2_AUDIODB_INITIALIZERS
+{
+  processArgs(argc, argv);
+  isServer = 1; // FIXME: Hack
+  assert(O2_ACTION(COM_STATUS));
+  status(dbName, adbStatusResult);
+}
+
 audioDB::~audioDB(){
   // Clean up
   if(indata)
@@ -233,12 +165,12 @@
   if(args_info.radius_given){
     radius=args_info.radius_arg;
     if(radius<=0 || radius>1000000000){
-      cerr << "Warning: radius out of range" << endl;
-      exit(1);
+      error("radius out of range");
     }
     else
-      if(verbosity>3)
+      if(verbosity>3) {
 	cerr << "Setting radius to " << radius << endl;
+      }
   }

   if(args_info.SERVER_given){
@@ -483,8 +415,9 @@
     error("write error", "", "write");

   // mmap the output file
-  if(verbosity)
+  if(verbosity) {
     cerr << "header size:" << O2_HEADERSIZE << endl;
+  }
   if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
 			 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
     error("mmap error for creating database", "", "mmap");
@@ -500,9 +433,9 @@
   dbH->flags=0; //O2_FLAG_L2NORM;

   memcpy (db, dbH, O2_HEADERSIZE);
-  if(verbosity)
+  if(verbosity) {
     cerr << COM_CREATE << " " << dbName << endl;
-
+  }
 }


@@ -597,16 +530,18 @@
     }

   if(alreadyInserted){
-    if(verbosity)
+    if(verbosity) {
       cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl;
+    }
     return;
   }

   // Make a track index table of features to file indexes
   unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
   if(!numVectors){
-    if(verbosity)
+    if(verbosity) {
       cerr << "Warning: ignoring zero-length feature vector file:" << key << endl;
+    }
     // CLEAN UP
     munmap(indata,statbuf.st_size);
     munmap(db,O2_DEFAULTDBSIZE);
@@ -646,9 +581,10 @@

   // Report status
   status(dbName);
-  if(verbosity)
+  if(verbosity) {
     cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
 	 << (statbuf.st_size-sizeof(int)) << " bytes." << endl;
+  }

   // CLEAN UP
   munmap(indata,statbuf.st_size);
@@ -701,8 +637,9 @@
        cerr << "expected " << numVectors << " found " << numtimes << endl;
        error("Times file is incorrect length for features file",inFile);
      }
-     if(verbosity>2)
+     if(verbosity>2) {
        cerr << "numtimes: " << numtimes << endl;
+     }
    }
  }
 }
@@ -816,16 +753,18 @@
       }

     if(alreadyInserted){
-      if(verbosity)
+      if(verbosity) {
 	cerr << "Warning: key already exists in database:" << thisKey << endl;
+      }
     }
     else{

       // Make a track index table of features to file indexes
       unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
       if(!numVectors){
-	if(verbosity)
+	if(verbosity) {
 	  cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl;
+        }
       }
       else{
 	if(usingTimes){
@@ -880,9 +819,10 @@
 			 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
     error("mmap error for creating database", "", "mmap");

-  if(verbosity)
+  if(verbosity) {
     cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
 	 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl;
+  }

   // Report status
   status(dbName);
@@ -890,16 +830,25 @@
   munmap(db,O2_DEFAULTDBSIZE);
 }

+// FIXME: this can't propagate the sequence length argument (used for
+// dudCount).  See adb__status() definition for the other half of
+// this.  -- CSR, 2007-10-01
 void audioDB::ws_status(const char*dbName, char* hostport){
   struct soap soap;
-  int adbStatusResult;
+  adb__statusResult adbStatusResult;

   // Query an existing adb database
   soap_init(&soap);
-  if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResult)==SOAP_OK)
-    std::cout << "result = " << adbStatusResult << std::endl;
-  else
+  if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResult)==SOAP_OK) {
+    cout << "numFiles = " << adbStatusResult.numFiles << endl;
+    cout << "dim = " << adbStatusResult.dim << endl;
+    cout << "length = " << adbStatusResult.length << endl;
+    cout << "dudCount = " << adbStatusResult.dudCount << endl;
+    cout << "nullCount = " << adbStatusResult.nullCount << endl;
+    cout << "flags = " << adbStatusResult.flags << endl;
+  } else {
     soap_print_fault(&soap,stderr);
+  }

   soap_destroy(&soap);
   soap_end(&soap);
@@ -929,21 +878,9 @@
 }


-void audioDB::status(const char* dbName){
+void audioDB::status(const char* dbName, adb__statusResult *adbStatusResult){
   if(!dbH)
     initTables(dbName, 0, 0);
-
-  // Update Header information
-  cout << "num files:" << dbH->numFiles << endl;
-  cout << "data dim:" << dbH->dim <<endl;
-  if(dbH->dim>0){
-    cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl;
-    cout << "vectors available:" << (timesTableOffset-(dataoffset+dbH->length))/(sizeof(double)*dbH->dim) << endl;
-  }
-  cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(timesTableOffset-dataoffset) << "%)" << endl;
-  cout << "bytes available:" << timesTableOffset-(dataoffset+dbH->length) << " (" <<
-    (100.0*(timesTableOffset-(dataoffset+dbH->length)))/(timesTableOffset-dataoffset) << "%)" << endl;
-  cout << "flags:" << dbH->flags << endl;

   unsigned dudCount=0;
   unsigned nullCount=0;
@@ -951,13 +888,35 @@
     if(trackTable[k]<sequenceLength){
       dudCount++;
       if(!trackTable[k])
-	nullCount++;
+        nullCount++;
     }
   }
-  cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl;
+
+  if(adbStatusResult == 0) {
+
+    // Update Header information
+    cout << "num files:" << dbH->numFiles << endl;
+    cout << "data dim:" << dbH->dim <<endl;
+    if(dbH->dim>0){
+      cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl;
+      cout << "vectors available:" << (timesTableOffset-(dataoffset+dbH->length))/(sizeof(double)*dbH->dim) << endl;
+    }
+    cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(timesTableOffset-dataoffset) << "%)" << endl;
+    cout << "bytes available:" << timesTableOffset-(dataoffset+dbH->length) << " (" <<
+      (100.0*(timesTableOffset-(dataoffset+dbH->length)))/(timesTableOffset-dataoffset) << "%)" << endl;
+    cout << "flags:" << dbH->flags << endl;
+
+    cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl;
+  } else {
+    adbStatusResult->numFiles = dbH->numFiles;
+    adbStatusResult->dim = dbH->dim;
+    adbStatusResult->length = dbH->length;
+    adbStatusResult->dudCount = dudCount;
+    adbStatusResult->nullCount = nullCount;
+    adbStatusResult->flags = dbH->flags;
+  }
 }

-
 void audioDB::dump(const char* dbName){
   if(!dbH)
     initTables(dbName, 0, 0);
@@ -971,7 +930,7 @@
 }

 void audioDB::l2norm(const char* dbName){
-  initTables(dbName, 0, 0);
+  initTables(dbName, true, 0);
   if(dbH->length>0){
     unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
     unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
@@ -1041,7 +1000,7 @@
   unsigned qIndexes[pointNN];
   unsigned sIndexes[pointNN];
   for(unsigned k=0; k<pointNN; k++){
-    distances[k]=0.0;
+    distances[k]=-DBL_MAX;
     qIndexes[k]=~0;
     sIndexes[k]=~0;
   }
@@ -1083,8 +1042,9 @@
     if(queryPoint>numVectors-1)
       error("queryPoint > numVectors in query");
     else{
-      if(verbosity>1)
+      if(verbosity>1) {
 	cerr << "query point: " << queryPoint << endl; cerr.flush();
+      }
       query=query+queryPoint*dbH->dim;
       numVectors=queryPoint+1;
       j=1;
@@ -1129,8 +1089,9 @@
   }

   gettimeofday(&tv2, NULL);
-  if(verbosity>1)
+  if(verbosity>1) {
     cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
+  }

   if(adbQueryResult==0){
     // Output answer
@@ -1158,7 +1119,7 @@
     adbQueryResult->Dist = new double[listLen];
     adbQueryResult->Qpos = new int[listLen];
     adbQueryResult->Spos = new int[listLen];
-    for(k=0; k<adbQueryResult->__sizeRlist; k++){
+    for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){
       adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
       adbQueryResult->Dist[k]=distances[k];
       adbQueryResult->Qpos[k]=qIndexes[k];
@@ -1193,8 +1154,6 @@

   // For each input vector, find the closest pointNN matching output vectors and report
   unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
-  unsigned numTracks = dbH->numFiles;
-
   double* query = (double*)(indata+sizeof(int));
   double* data = dataBuf;
   double* queryCopy = 0;
@@ -1227,13 +1186,13 @@
   double thisDist;

   for(k=0; k<pointNN; k++){
-    distances[k]=0.0;
+    distances[k]=-DBL_MAX;
     qIndexes[k]=~0;
     sIndexes[k]=~0;
   }

   for(k=0; k<trackNN; k++){
-    trackDistances[k]=0.0;
+    trackDistances[k]=-DBL_MAX;
     trackQIndexes[k]=~0;
     trackSIndexes[k]=~0;
     trackIDs[k]=~0;
@@ -1273,8 +1232,9 @@
     if(queryPoint>numVectors-1)
       error("queryPoint > numVectors in query");
     else{
-      if(verbosity>1)
+      if(verbosity>1) {
 	cerr << "query point: " << queryPoint << endl; cerr.flush();
+      }
       query=query+queryPoint*dbH->dim;
       numVectors=queryPoint+1;
     }
@@ -1303,8 +1263,9 @@
     }
     trackOffset=trackOffsetTable[track];     // numDoubles offset
     trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
-    if(verbosity>7)
+    if(verbosity>7) {
       cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush();
+    }

     if(dbH->flags & O2_FLAG_L2NORM)
       usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy;
@@ -1353,16 +1314,18 @@
     // Take the average of this track's distance
     // Test the track distances
     thisDist=0;
-    n=pointNN;
-    while(n--)
-      thisDist+=distances[pointNN-n-1];
-    thisDist/=pointNN;
+    for (n = 0; n < pointNN; n++) {
+      if (distances[n] == -DBL_MAX) break;
+      thisDist += distances[n];
+    }
+    thisDist /= n;
+
     n=trackNN;
     while(n--){
       if(thisDist>=trackDistances[n]){
 	if((n==0 || thisDist<=trackDistances[n-1])){
 	  // Copy all values above up the queue
-	  for( l=pointNN-1 ; l > n ; l--){
+	  for( l=trackNN-1 ; l > n ; l--){
 	    trackDistances[l]=trackDistances[l-1];
 	    trackQIndexes[l]=trackQIndexes[l-1];
 	    trackSIndexes[l]=trackSIndexes[l-1];
@@ -1379,20 +1342,22 @@
 	break;
     }
     for(unsigned k=0; k<pointNN; k++){
-      distances[k]=0.0;
+      distances[k]=-DBL_MAX;
       qIndexes[k]=~0;
       sIndexes[k]=~0;
     }
   } // tracks
   gettimeofday(&tv2, NULL);

-  if(verbosity>1)
+  if(verbosity>1) {
     cerr << endl << "processed tracks :" << processedTracks
 	 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
+  }

   if(adbQueryResult==0){
-    if(verbosity>1)
+    if(verbosity>1) {
       cerr<<endl;
+    }
     // Output answer
     // Loop over nearest neighbours
     for(k=0; k < min(trackNN,processedTracks); k++)
@@ -1409,7 +1374,7 @@
     adbQueryResult->Dist = new double[listLen];
     adbQueryResult->Qpos = new int[listLen];
     adbQueryResult->Spos = new int[listLen];
-    for(k=0; k<adbQueryResult->__sizeRlist; k++){
+    for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){
       adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
       adbQueryResult->Dist[k]=trackDistances[k];
       adbQueryResult->Qpos[k]=trackQIndexes[k];
@@ -1445,10 +1410,7 @@
   // For each input vector, find the closest pointNN matching output vectors and report
   // we use stdout in this stub version
   unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
-  unsigned numTracks = dbH->numFiles;
-
   double* query = (double*)(indata+sizeof(int));
-  double* data = dataBuf;
   double* queryCopy = 0;

   double qMeanL2;
@@ -1459,10 +1421,14 @@
   double DIFF_THRESH=0;

   if(!(dbH->flags & O2_FLAG_L2NORM) )
-    error("Database must be L2 normed for sequence query","use -l2norm");
+    error("Database must be L2 normed for sequence query","use -L2NORM");
+
+  if(numVectors<sequenceLength)
+    error("Query shorter than requested sequence length", "maybe use -l");

-  if(verbosity>1)
+  if(verbosity>1) {
     cerr << "performing norms ... "; cerr.flush();
+  }
   unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);

   // Make a copy of the query
@@ -1517,28 +1483,29 @@
   unsigned processedTracks=0;
   for(i=0; i<dbH->numFiles; i++){
     if(trackTable[i]>sequenceLength-1){
-      w = trackTable[i]-sequenceLength;
+      w = trackTable[i]-sequenceLength+1;
       pn = sMeanL2+i;
       *pn=0;
       while(w--)
 	if(*ps>0)
 	  *pn+=*ps++;
-      *pn/=trackTable[i]-sequenceLength;
+      *pn/=trackTable[i]-sequenceLength+1;
       SILENCE_THRESH+=*pn;
       processedTracks++;
     }
     ps = sNorm + trackTable[i];
   }
-  if(verbosity>1)
+  if(verbosity>1) {
     cerr << "processedTracks: " << processedTracks << endl;
-
+  }

   SILENCE_THRESH/=processedTracks;
   USE_THRESH=1; // Turn thresholding on
   DIFF_THRESH=SILENCE_THRESH; //  mean shingle power
   SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
-  if(verbosity>4)
+  if(verbosity>4) {
     cerr << "silence thresh: " << SILENCE_THRESH;
+  }
   w=sequenceLength-1;
   i=1;
   tmp1=*qNorm;
@@ -1561,12 +1528,13 @@
   }
   qMeanL2 /= numVectors-sequenceLength+1;

-  if(verbosity>1)
-    cerr << "done." << endl;
+  if(verbosity>1) {
+    cerr << "done." << endl;
+  }

-
-  if(verbosity>1)
+  if(verbosity>1) {
     cerr << "matching tracks..." << endl;
+  }

   assert(pointNN>0 && pointNN<=O2_MAXNN);
   assert(trackNN>0 && trackNN<=O2_MAXNN);
@@ -1584,7 +1552,6 @@

   unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
   double thisDist;
-  double oneOverWL=1.0/wL;

   for(k=0; k<pointNN; k++){
     distances[k]=1.0e6;
@@ -1622,8 +1589,9 @@
       meanQdur+=timesdata[k];
     }
     meanQdur/=k;
-    if(verbosity>1)
+    if(verbosity>1) {
       cerr << "mean query file duration: " << meanQdur << endl;
+    }
     meanDBdur = new double[dbH->numFiles];
     assert(meanDBdur);
     for(k=0; k<dbH->numFiles; k++){
@@ -1638,8 +1606,9 @@
     if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
       error("queryPoint > numVectors-wL+1 in query");
     else{
-      if(verbosity>1)
+      if(verbosity>1) {
 	cerr << "query point: " << queryPoint << endl; cerr.flush();
+      }
       query=query+queryPoint*dbH->dim;
       qNorm=qNorm+queryPoint;
       numVectors=wL;
@@ -1660,7 +1629,6 @@
   double* qp;
   double* sp;
   double* dp;
-  double diffL2;

   // build track offset table
   unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
@@ -1696,10 +1664,11 @@
     trackOffset=trackOffsetTable[track];     // numDoubles offset
     trackIndexOffset=trackOffset/dbH->dim; // numVectors offset

-    if(sequenceLength<trackTable[track]){  // test for short sequences
+    if(sequenceLength<=trackTable[track]){  // test for short sequences

-      if(verbosity>7)
+      if(verbosity>7) {
 	cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
+      }

       // Sum products matrix
       for(j=0; j<numVectors;j++){
@@ -1714,7 +1683,6 @@
 	assert(DD[j]);
       }

-      double tmp;
       // Dot product
       for(j=0; j<numVectors; j++)
 	for(k=0; k<trackTable[track]; k++){
@@ -1755,7 +1723,7 @@
 	  }
       }

-      if(verbosity>3 && usingTimes){
+      if(verbosity>3 && usingTimes) {
 	cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
 	cerr.flush();
       }
@@ -1764,17 +1732,18 @@
 	 (usingTimes
 	  && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){

-	if(verbosity>3 && usingTimes){
+	if(verbosity>3 && usingTimes) {
 	  cerr << "within duration tolerance." << endl;
 	  cerr.flush();
 	}

 	// Search for minimum distance by shingles (concatenated vectors)
-	for(j=0;j<numVectors-wL;j+=HOP_SIZE)
-	  for(k=0;k<trackTable[track]-wL;k+=HOP_SIZE){
+	for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
+	  for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
 	    thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
-	    if(verbosity>10)
+	    if(verbosity>10) {
 	      cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl;
+            }
 	    // Gather chi^2 statistics
 	    if(thisDist<minSample)
 	      minSample=thisDist;
@@ -1820,13 +1789,16 @@
 	  }
 	// Calculate the mean of the N-Best matches
 	thisDist=0.0;
-	for(m=0; m<pointNN; m++)
+	for(m=0; m<pointNN; m++) {
+          if (distances[m] == 1000000.0) break;
 	  thisDist+=distances[m];
-	thisDist/=pointNN;
+        }
+	thisDist/=m;

 	// Let's see the distances then...
-	if(verbosity>3)
+	if(verbosity>3) {
 	  cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;
+        }


 	// All the track stuff goes here
@@ -1874,15 +1846,16 @@
   }

   gettimeofday(&tv2,NULL);
-  if(verbosity>1){
+  if(verbosity>1) {
     cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
 	 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
     cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
 	 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
   }
   if(adbQueryResult==0){
-    if(verbosity>1)
+    if(verbosity>1) {
       cerr<<endl;
+    }
     // Output answer
     // Loop over nearest neighbours
     for(k=0; k < min(trackNN,successfulTracks); k++)
@@ -1899,7 +1872,7 @@
     adbQueryResult->Dist = new double[listLen];
     adbQueryResult->Qpos = new int[listLen];
     adbQueryResult->Spos = new int[listLen];
-    for(k=0; k<adbQueryResult->__sizeRlist; k++){
+    for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){
       adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
       adbQueryResult->Dist[k]=trackDistances[k];
       adbQueryResult->Qpos[k]=trackQIndexes[k];
@@ -1939,10 +1912,7 @@
   // For each input vector, find the closest pointNN matching output vectors and report
   // we use stdout in this stub version
   unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
-  unsigned numTracks = dbH->numFiles;
-
   double* query = (double*)(indata+sizeof(int));
-  double* data = dataBuf;
   double* queryCopy = 0;

   double qMeanL2;
@@ -1955,8 +1925,9 @@
   if(!(dbH->flags & O2_FLAG_L2NORM) )
     error("Database must be L2 normed for sequence query","use -l2norm");

-  if(verbosity>1)
+  if(verbosity>1) {
     cerr << "performing norms ... "; cerr.flush();
+  }
   unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);

   // Make a copy of the query
@@ -2011,28 +1982,29 @@
   unsigned processedTracks=0;
   for(i=0; i<dbH->numFiles; i++){
     if(trackTable[i]>sequenceLength-1){
-      w = trackTable[i]-sequenceLength;
+      w = trackTable[i]-sequenceLength+1;
       pn = sMeanL2+i;
       *pn=0;
       while(w--)
 	if(*ps>0)
 	  *pn+=*ps++;
-      *pn/=trackTable[i]-sequenceLength;
+      *pn/=trackTable[i]-sequenceLength+1;
       SILENCE_THRESH+=*pn;
       processedTracks++;
     }
     ps = sNorm + trackTable[i];
   }
-  if(verbosity>1)
+  if(verbosity>1) {
     cerr << "processedTracks: " << processedTracks << endl;
-
+  }

   SILENCE_THRESH/=processedTracks;
   USE_THRESH=1; // Turn thresholding on
   DIFF_THRESH=SILENCE_THRESH; //  mean shingle power
   SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
-  if(verbosity>4)
+  if(verbosity>4) {
     cerr << "silence thresh: " << SILENCE_THRESH;
+  }
   w=sequenceLength-1;
   i=1;
   tmp1=*qNorm;
@@ -2055,12 +2027,13 @@
   }
   qMeanL2 /= numVectors-sequenceLength+1;

-  if(verbosity>1)
+  if(verbosity>1) {
     cerr << "done." << endl;
+  }

-
-  if(verbosity>1)
+  if(verbosity>1) {
     cerr << "matching tracks..." << endl;
+  }

   assert(pointNN>0 && pointNN<=O2_MAXNN);
   assert(trackNN>0 && trackNN<=O2_MAXNN);
@@ -2076,9 +2049,8 @@
   unsigned sIndexes[pointNN];


-  unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
+  unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
   double thisDist;
-  double oneOverWL=1.0/wL;

   for(k=0; k<pointNN; k++){
     distances[k]=0.0;
@@ -2116,8 +2088,9 @@
       meanQdur+=timesdata[k];
     }
     meanQdur/=k;
-    if(verbosity>1)
+    if(verbosity>1) {
       cerr << "mean query file duration: " << meanQdur << endl;
+    }
     meanDBdur = new double[dbH->numFiles];
     assert(meanDBdur);
     for(k=0; k<dbH->numFiles; k++){
@@ -2132,8 +2105,9 @@
     if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
       error("queryPoint > numVectors-wL+1 in query");
     else{
-      if(verbosity>1)
+      if(verbosity>1) {
 	cerr << "query point: " << queryPoint << endl; cerr.flush();
+      }
       query=query+queryPoint*dbH->dim;
       qNorm=qNorm+queryPoint;
       numVectors=wL;
@@ -2154,7 +2128,6 @@
   double* qp;
   double* sp;
   double* dp;
-  double diffL2;

   // build track offset table
   unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
@@ -2190,11 +2163,12 @@
     trackOffset=trackOffsetTable[track];     // numDoubles offset
     trackIndexOffset=trackOffset/dbH->dim; // numVectors offset

-    if(sequenceLength<trackTable[track]){  // test for short sequences
+    if(sequenceLength<=trackTable[track]){  // test for short sequences

-      if(verbosity>7)
+      if(verbosity>7) {
 	cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
-
+      }
+
       // Sum products matrix
       for(j=0; j<numVectors;j++){
 	D[j]=new double[trackTable[track]];
@@ -2208,7 +2182,6 @@
 	assert(DD[j]);
       }

-      double tmp;
       // Dot product
       for(j=0; j<numVectors; j++)
 	for(k=0; k<trackTable[track]; k++){
@@ -2249,7 +2222,7 @@
 	  }
       }

-      if(verbosity>3 && usingTimes){
+      if(verbosity>3 && usingTimes) {
 	cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
 	cerr.flush();
       }
@@ -2258,17 +2231,18 @@
 	 (usingTimes
 	  && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){

-	if(verbosity>3 && usingTimes){
+	if(verbosity>3 && usingTimes) {
 	  cerr << "within duration tolerance." << endl;
 	  cerr.flush();
 	}

 	// Search for minimum distance by shingles (concatenated vectors)
-	for(j=0;j<numVectors-wL;j+=HOP_SIZE)
-	  for(k=0;k<trackTable[track]-wL;k+=HOP_SIZE){
+	for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
+	  for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
 	    thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
-	    if(verbosity>10)
+	    if(verbosity>10) {
 	      cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl;
+            }
 	    // Gather chi^2 statistics
 	    if(thisDist<minSample)
 	      minSample=thisDist;
@@ -2299,8 +2273,9 @@
 	thisDist=distances[0];

 	// Let's see the distances then...
-	if(verbosity>3)
+	if(verbosity>3) {
 	  cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;
+        }

 	// All the track stuff goes here
 	n=trackNN;
@@ -2347,7 +2322,7 @@
   }

   gettimeofday(&tv2,NULL);
-  if(verbosity>1){
+  if(verbosity>1) {
     cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
 	 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
     cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
@@ -2355,8 +2330,9 @@
   }

   if(adbQueryResult==0){
-    if(verbosity>1)
+    if(verbosity>1) {
       cerr<<endl;
+    }
     // Output answer
     // Loop over nearest neighbours
     for(k=0; k < min(trackNN,successfulTracks); k++)
@@ -2372,7 +2348,7 @@
     adbQueryResult->Dist = new double[listLen];
     adbQueryResult->Qpos = new int[listLen];
     adbQueryResult->Spos = new int[listLen];
-    for(k=0; k<adbQueryResult->__sizeRlist; k++){
+    for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){
       adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
       adbQueryResult->Dist[k]=trackDistances[k];
       adbQueryResult->Qpos[k]=trackQIndexes[k];
@@ -2401,47 +2377,13 @@

 }

-void audioDB::normalize(double* X, int dim, int n){
-  unsigned c = n*dim;
-  double minval,maxval,v,*p;
-
-  p=X;
-  while(c--){
-    v=*p++;
-    if(v<minval)
-      minval=v;
-    else if(v>maxval)
-      maxval=v;
-  }
-
-  normalize(X, dim, n, minval, maxval);
-
-}
-
-void audioDB::normalize(double* X, int dim, int n, double minval, double maxval){
-  unsigned c = n*dim;
-  double *p;
-
-
-  if(maxval==minval)
-    return;
-
-  maxval=1.0/(maxval-minval);
-  c=n*dim;
-  p=X;
-
-  while(c--){
-    *p=(*p-minval)*maxval;
-    p++;
-  }
-}
-
 // Unit norm block of features
 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
   unsigned d;
-  double L2, oneOverL2, *p;
-  if(verbosity>2)
+  double L2, *p;
+  if(verbosity>2) {
     cerr << "norming " << n << " vectors...";cerr.flush();
+  }
   while(n--){
     p=X;
     L2=0.0;
@@ -2462,14 +2404,15 @@
     */
     X+=dim;
   }
-  if(verbosity>2)
+  if(verbosity>2) {
     cerr << "done..." << endl;
+  }
 }

 // Unit norm block of features
 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
   unsigned d;
-  double L2, oneOverL2, *p;
+  double *p;
   unsigned nn = n;

   assert(l2normTable);
@@ -2477,8 +2420,9 @@
   if( !append && (dbH->flags & O2_FLAG_L2NORM) )
     error("Database is already L2 normed", "automatic norm on insert is enabled");

-  if(verbosity>2)
+  if(verbosity>2) {
     cerr << "norming " << n << " vectors...";cerr.flush();
+  }

   double* l2buf = new double[n];
   double* l2ptr = l2buf;
@@ -2512,8 +2456,9 @@
   memcpy(l2normTable+offset, l2buf, n*sizeof(double));
   if(l2buf)
     delete[] l2buf;
-  if(verbosity>2)
+  if(verbosity>2) {
     cerr << "done..." << endl;
+  }
 }


@@ -2536,7 +2481,7 @@
 	      soap_print_fault(&soap, stderr);
 	      break;
 	    }
-	  fprintf(stderr, "%d: accepted connection from IP=%d.%d.%d.%d socket=%d\n", i,
+	  fprintf(stderr, "%d: accepted connection from IP=%lu.%lu.%lu.%lu socket=%d\n", i,
 		  (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s);
 	  if (soap_serve(&soap) != SOAP_OK) // process RPC request
 	    soap_print_fault(&soap, stderr); // print error
@@ -2552,12 +2497,16 @@
 // web services

 // SERVER SIDE
-int adb__status(struct soap* soap, xsd__string dbName, xsd__int &adbCreateResult){
+int adb__status(struct soap* soap, xsd__string dbName, adb__statusResult &adbStatusResult){
   char* const argv[]={"audioDB",COM_STATUS,"-d",dbName};
   const unsigned argc = 4;
-  audioDB(argc,argv);
-  adbCreateResult=100;
-  return SOAP_OK;
+  try {
+    audioDB(argc, argv, &adbStatusResult);
+    return SOAP_OK;
+  } catch(char *err) {
+    soap_receiver_fault(soap, err, "");
+    return SOAP_FAULT;
+  }
 }

 // Literal translation of command line to web service
--- a/audioDB.h	Wed Aug 29 16:24:29 2007 +0000
+++ b/audioDB.h	Mon Oct 01 14:40:08 2007 +0000
@@ -1,75 +1,3 @@
-/* audioDB.h
-
-audioDB version 1.0
-
-An efficient feature-vector database management system (FVDBMS) for
-content-based multimedia search and retrieval.
-
-Usage: audioDB [OPTIONS]...
-
-      --full-help              Print help, including hidden options, and exit
-  -V, --version                Print version and exit
-  -H, --help                   print help on audioDB usage and exit.
-
-Database Setup:
-  These commands require a database argument.
-  -d, --database=filename      database name to be used with database commands
-  -N, --new                    make a new database
-  -S, --status                 database information
-  -D, --dump                   list all tracks: index key size
-
-Database Insertion:
-  The following commands process a binary input feature file and optional
-  associated key.
-  -I, --insert                 add feature vectors to an existing database
-  -f, --features=filename      binary series of vectors file
-  -t, --times=filename         list of time points (ascii) for feature vectors
-  -k, --key=identifier         unique identifier associated with features
-
-Batch Commands:
-  These batch commands require a list of feature vector filenames in a text
-  file and optional list of keys in a text file.
-  -B, --batchinsert            add feature vectors named in a featureList file
-                                 (with optional keys in a keyList file) to the
-                                 named database
-  -F, --featureList=filename   text file containing list of binary feature
-                                 vector files to process
-  -T, --timesList=filename     text file containing list of ascii time-point
-                                 files for each feature vector file named in
-                                 featureList
-  -K, --keyList=filename       text file containing list of unique identifiers
-                                 to associate with list of feature files
-
-Database Search:
-  Thse commands control the behaviour of retrieval from a named database.
-  -Q, --query                  perform a content-based search on the named
-                                 database using the named feature vector file
-                                 as a query
-  -q, --qtype=type             the type of search  (possible values="point",
-                                 "track", "sequence" default=`sequence')
-  -p, --qpoint=position        ordinal position of query vector (or start of
-                                 sequence) in feature vector input file
-                                 (default=`0')
-  -n, --pointnn=numpoints      number of point nearest neighbours to use [per
-                                 track in track and sequence mode]
-                                 (default=`10')
-  -r, --resultlength=length    maximum length of the result list
-                                 (default=`10')
-  -l, --sequencelength=length  length of sequences for sequence search
-                                 (default=`16')
-  -h, --sequencehop=hop        hop size of sequence window for sequence search
-                                 (default=`1')
-
-Web Services:
-  These commands enable the database process to establish a connection via the
-  internet and operate as separate client and server processes.
-  -s, --server=port            run as standalone web service on named port
-                                 (default=`80011')
-  -c, --client=hostname:port   run as a client using named host service
-
-*/
-
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>
@@ -82,6 +10,7 @@
 #include <math.h>
 #include <sys/time.h>
 #include <assert.h>
+#include <float.h>

 // includes for web services
 #include "soapH.h"
@@ -224,13 +153,13 @@
   void initTables(const char* dbName, bool forWrite, const char* inFile);
   void unitNorm(double* X, unsigned d, unsigned n, double* qNorm);
   void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append);
-  void normalize(double* X, int dim, int n);
-  void normalize(double* X, int dim, int n, double minval, double maxval);
   void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata);
   unsigned getKeyPos(char* key);
  public:

-  audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult=0);
+  audioDB(const unsigned argc, char* const argv[]);
+  audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult);
+  audioDB(const unsigned argc, char* const argv[], adb__statusResult *adbStatusResult);
   ~audioDB();
   int processArgs(const unsigned argc, char* const argv[]);
   void get_lock(int fd, bool exclusive);
@@ -240,7 +169,7 @@
   void insert(const char* dbName, const char* inFile);
   void batchinsert(const char* dbName, const char* inFile);
   void query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0);
-  void status(const char* dbName);
+  void status(const char* dbName, adb__statusResult *adbStatusResult=0);
   void ws_status(const char*dbName, char* hostport);
   void ws_query(const char*dbName, const char *trackKey, const char* hostport);
   void l2norm(const char* dbName);
--- a/audioDBws.h	Wed Aug 29 16:24:29 2007 +0000
+++ b/audioDBws.h	Mon Oct 01 14:40:08 2007 +0000
@@ -23,8 +23,17 @@
   int *Spos;
 };

+class adb__statusResult {
+  unsigned numFiles;
+  unsigned dim;
+  unsigned length;
+  unsigned dudCount;
+  unsigned nullCount;
+  unsigned flags;
+};
+
 // Print the status of an existing adb database
-int adb__status(xsd__string dbName, xsd__int &adbCreateResult);
+int adb__status(xsd__string dbName, adb__statusResult &adbStatusResult);

 // Query an existing adb database
 int adb__query(xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int segNN, xsd__int segLen, adb__queryResult &adbQueryResult);
--- a/debian/changelog	Wed Aug 29 16:24:29 2007 +0000
+++ b/debian/changelog	Mon Oct 01 14:40:08 2007 +0000
@@ -1,3 +1,9 @@
+audiodb (1.0-7) unstable; urgency=low
+
+  * updated to svn version #93
+
+ -- Christophe Rhodes <c.rhodes@gold.ac.uk>  Mon,  1 Oct 2007 16:35:53 +0100
+
 audiodb (1.0-6) unstable; urgency=low

   * updated to svn version #51
--- a/gengetopt.in	Wed Aug 29 16:24:29 2007 +0000
+++ b/gengetopt.in	Mon Oct 01 14:40:08 2007 +0000
@@ -73,7 +73,7 @@
 option "sequencehop"  h "hop size of sequence window for sequence search." int typestr="hop" default="1" dependon="QUERY" optional

 section "Web Services" sectiondesc="These commands enable the database process to establish a connection via the internet and operate as separate client and server processes.\n"
-option "SERVER" s "run as standalone web service on named port." int typestr="port" default="80011" optional
+option "SERVER" s "run as standalone web service on named port." int typestr="port" default="14475" optional
 option "client" c "run as a client using named host service." string typestr="hostname:port" optional
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0001/run-test.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+# creation
+${AUDIODB} -N -d testdb
+
+stat testdb
+
+# should fail (testdb exists)
+expect_clean_error_exit ${AUDIODB} -N -d testdb
+
+# should fail (no db given)
+expect_clean_error_exit ${AUDIODB} -N
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0001/short-description	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,1 @@
+DB creation
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0002/run-test.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,17 @@
+#! /bin/sh
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -N -d testdb
+
+# FIXME: at some point we will want to test that some relevant
+# information is being printed
+${AUDIODB} -S -d testdb
+${AUDIODB} -d testdb -S
+
+# should fail (no db given)
+expect_clean_error_exit ${AUDIODB} -S
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0002/short-description	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,1 @@
+DB status
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0003/run-test.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,32 @@
+#! /bin/sh
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -d testdb -N
+
+# We could contemplate putting the test feature (and the expected
+# query output) under svn control if we trust its binary file
+# handling.
+
+# FIXME: endianness!
+intstring 1 > testfeature
+floatstring 1 >> testfeature
+
+${AUDIODB} -d testdb -I -f testfeature
+
+${AUDIODB} -d testdb -Q point -f testfeature > test-query-output
+
+echo testfeature 1 0 0 > test-expected-query-output
+
+cmp test-query-output test-expected-query-output
+
+# failure cases
+expect_clean_error_exit ${AUDIODB} -d testdb -I
+expect_clean_error_exit ${AUDIODB} -d testdb -f testfeature
+expect_clean_error_exit ${AUDIODB} -I -f testfeature
+expect_clean_error_exit ${AUDIODB} -d testdb -Q notpoint -f testfeature
+expect_clean_error_exit ${AUDIODB} -Q point -f testfeature
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0003/short-description	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,1 @@
+1D insertion / point query
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0004/run-test.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,39 @@
+#! /bin/sh
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -d testdb -N
+
+intstring 2 > testfeature
+floatstring 0 1 >> testfeature
+floatstring 1 0 >> testfeature
+
+${AUDIODB} -d testdb -I -f testfeature
+
+echo "query point (0.0,0.5)"
+intstring 2 > testquery
+floatstring 0 0.5 >> testquery
+
+${AUDIODB} -d testdb -Q point -f testquery > testoutput
+echo testfeature 0.5 0 0 > test-expected-output
+echo testfeature 0 0 1 >> test-expected-output
+cmp testoutput test-expected-output
+${AUDIODB} -d testdb -Q point -f testquery -n 1 > testoutput
+echo testfeature 0.5 0 0 > test-expected-output
+cmp testoutput test-expected-output
+
+echo "query point (0.5,0.0)"
+intstring 2 > testquery
+floatstring 0.5 0 >> testquery
+
+${AUDIODB} -d testdb -Q point -f testquery > testoutput
+echo testfeature 0.5 0 1 > test-expected-output
+echo testfeature 0 0 0 >> test-expected-output
+cmp testoutput test-expected-output
+${AUDIODB} -d testdb -Q point -f testquery -n 1 > testoutput
+echo testfeature 0.5 0 1 > test-expected-output
+cmp testoutput test-expected-output
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0004/short-description	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,1 @@
+point query
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0005/run-test.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,19 @@
+#! /bin/bash
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -d testdb -N
+
+intstring 2 > testfeature
+floatstring 0 1 >> testfeature
+floatstring 1 0 >> testfeature
+
+${AUDIODB} -d testdb -I -f testfeature
+
+echo running L2Norm
+
+${AUDIODB} -d testdb -L
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0005/short-description	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,1 @@
+L2 Norm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0006/run-test.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,40 @@
+#! /bin/sh
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -d testdb -N
+
+intstring 2 > testfeature
+floatstring 0 1 >> testfeature
+floatstring 1 0 >> testfeature
+
+${AUDIODB} -d testdb -I -f testfeature
+
+# sequence queries require L2NORM
+${AUDIODB} -d testdb -L
+
+echo "query point (0.0,0.5)"
+intstring 2 > testquery
+floatstring 0 0.5 >> testquery
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery > testoutput
+echo testfeature 1 0 0 > test-expected-output
+cmp testoutput test-expected-output
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -n 1 > testoutput
+echo testfeature 0 0 0 > test-expected-output
+cmp testoutput test-expected-output
+
+echo "query point (0.5,0.0)"
+intstring 2 > testquery
+floatstring 0.5 0 >> testquery
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery > testoutput
+echo testfeature 1 0 1 > test-expected-output
+cmp testoutput test-expected-output
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -n 1 > testoutput
+echo testfeature 0 0 1 > test-expected-output
+cmp testoutput test-expected-output
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0006/short-description	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,1 @@
+sequence search / 1 track
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0007/run-test.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,35 @@
+#! /bin/sh
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -d testdb -N
+
+# tests that the lack of -l when the query sequence is shorter doesn't
+# segfault.
+
+intstring 2 > testfeature
+floatstring 0 1 >> testfeature
+floatstring 1 0 >> testfeature
+
+${AUDIODB} -d testdb -I -f testfeature
+
+# sequence queries require L2NORM
+${AUDIODB} -d testdb -L
+
+echo "query point (0.0,0.5)"
+intstring 2 > testquery
+floatstring 0 0.5 >> testquery
+
+expect_clean_error_exit ${AUDIODB} -d testdb -Q sequence -f testquery
+expect_clean_error_exit ${AUDIODB} -d testdb -Q sequence -f testquery -n 1
+
+echo "query point (0.5,0.0)"
+intstring 2 > testquery
+floatstring 0.5 0 >> testquery
+
+expect_clean_error_exit ${AUDIODB} -d testdb -Q sequence -f testquery
+expect_clean_error_exit ${AUDIODB} -d testdb -Q sequence -f testquery -n 1
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0007/short-description	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,1 @@
+short query [no -l] error
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0008/run-test.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,44 @@
+#! /bin/sh
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -d testdb -N
+
+intstring 2 > testfeature01
+floatstring 0 1 >> testfeature01
+intstring 2 > testfeature10
+floatstring 1 0 >> testfeature10
+
+${AUDIODB} -d testdb -I -f testfeature01
+${AUDIODB} -d testdb -I -f testfeature10
+
+# sequence queries require L2NORM
+${AUDIODB} -d testdb -L
+
+echo "query point (0.0,0.5)"
+intstring 2 > testquery
+floatstring 0 0.5 >> testquery
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery > testoutput
+echo testfeature01 0 0 0 > test-expected-output
+echo testfeature10 2 0 0 >> test-expected-output
+cmp testoutput test-expected-output
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -r 1 > testoutput
+echo testfeature01 0 0 0 > test-expected-output
+cmp testoutput test-expected-output
+
+echo "query point (0.5,0.0)"
+intstring 2 > testquery
+floatstring 0.5 0 >> testquery
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery > testoutput
+echo testfeature10 0 0 0 > test-expected-output
+echo testfeature01 2 0 0 >> test-expected-output
+cmp testoutput test-expected-output
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -r 1 > testoutput
+echo testfeature10 0 0 0 > test-expected-output
+cmp testoutput test-expected-output
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0008/short-description	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,1 @@
+sequence search / 2 tracks
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0009/run-test.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,41 @@
+#! /bin/sh
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -d testdb -N
+
+intstring 2 > testfeature01
+floatstring 0 1 >> testfeature01
+intstring 2 > testfeature10
+floatstring 1 0 >> testfeature10
+
+${AUDIODB} -d testdb -I -f testfeature01
+${AUDIODB} -d testdb -I -f testfeature10
+
+echo "query point (0.0,0.5)"
+intstring 2 > testquery
+floatstring 0 0.5 >> testquery
+
+${AUDIODB} -d testdb -Q track -l 1 -f testquery > testoutput
+echo testfeature01 0.5 0 0 > test-expected-output
+echo testfeature10 0 0 0 >> test-expected-output
+cmp testoutput test-expected-output
+${AUDIODB} -d testdb -Q track -l 1 -f testquery -r 1 > testoutput
+echo testfeature01 0.5 0 0 > test-expected-output
+cmp testoutput test-expected-output
+
+echo "query point (0.5,0.0)"
+intstring 2 > testquery
+floatstring 0.5 0 >> testquery
+
+${AUDIODB} -d testdb -Q track -l 1 -f testquery > testoutput
+echo testfeature10 0.5 0 0 > test-expected-output
+echo testfeature01 0 0 0 >> test-expected-output
+cmp testoutput test-expected-output
+${AUDIODB} -d testdb -Q track -l 1 -f testquery -r 1 > testoutput
+echo testfeature10 0.5 0 0 > test-expected-output
+cmp testoutput test-expected-output
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0009/short-description	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,1 @@
+track search
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0010/run-test.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,49 @@
+#! /bin/sh
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -d testdb -N
+
+intstring 2 > testfeature01
+floatstring 0 1 >> testfeature01
+intstring 2 > testfeature10
+floatstring 1 0 >> testfeature10
+
+${AUDIODB} -d testdb -I -f testfeature01
+${AUDIODB} -d testdb -I -f testfeature10
+
+# sequence queries require L2NORM
+${AUDIODB} -d testdb -L
+
+echo "query point (0.0,0.5)"
+intstring 2 > testquery
+floatstring 0 0.5 >> testquery
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -R 5 > testoutput
+echo testfeature01 1 > test-expected-output
+echo testfeature10 1 >> test-expected-output
+cmp testoutput test-expected-output
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -r 1 -R 5 > testoutput
+echo testfeature01 1 > test-expected-output
+cmp testoutput test-expected-output
+
+echo "query point (0.5,0.0)"
+intstring 2 > testquery
+floatstring 0.5 0 >> testquery
+
+# FIXME: because there's only one point in each track (and the query),
+# the ordering is essentially database order.  We need these test
+# cases anyway because we need to test non-segfaulting, non-empty
+# results...
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -R 5 > testoutput
+echo testfeature01 1 > test-expected-output
+echo testfeature10 1 >> test-expected-output
+cmp testoutput test-expected-output
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -r 1 -R 5 > testoutput
+echo testfeature01 1 > test-expected-output
+cmp testoutput test-expected-output
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0010/short-description	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,1 @@
+sequence radius search / 1 point
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/clean.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,11 @@
+#! /bin/sh
+
+for file in [0-9][0-9][0-9][0-9]*; do
+  if [ -d ${file} ]; then
+    echo Cleaning ${file}
+    rm -f ${file}/test*
+    if [ -f ${file}/clean.sh ]; then
+      (cd ${file} && sh ./clean.sh)
+    fi
+  fi
+done
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/run-tests.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,39 @@
+#! /bin/sh
+
+AUDIODB=../../${EXECUTABLE:-audioDB}
+export AUDIODB
+
+if [ -x ${AUDIODB:3} ]; then
+  :
+else
+  echo Cannot execute audioDB: ${AUDIODB:3}
+  exit 1
+fi
+
+for file in [0-9][0-9][0-9][0-9]*; do
+  if [ -d ${file} ]; then
+    if [ -f ${file}/run-test.sh ]; then
+      echo -n Running test ${file}
+      if [ -f ${file}/short-description ]; then
+        awk '{ printf(" (%s)",$0) }' < ${file}/short-description
+      fi
+      echo -n :
+      (cd ${file} && sh ./run-test.sh > test.out 2> test.err)
+      EXIT_STATUS=$?
+      if [ ${EXIT_STATUS} -ne 104 ]; then
+        echo " failed (exit status ${EXIT_STATUS})".
+        FAILED=true
+      else
+        echo " success."
+      fi
+    else
+      echo Skipping test ${file}
+    fi
+  fi
+done
+
+if [ -z "${FAILED}" ]; then
+  exit 0
+else
+  exit 1
+fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-utils.sh	Mon Oct 01 14:40:08 2007 +0000
@@ -0,0 +1,49 @@
+# no shebang line: this file should be sourced by run-test.sh files
+
+trap "exit 1" ERR
+
+if [ -z ${AUDIODB} ]; then
+  AUDIODB=../../audioDB
+fi
+
+# FIXME: maybe generalize to multiple arguments?  Also, implement it
+# properly, rather than just for a few floats that we know how to
+# encode.  This might involve writing some C code, as Bash doesn't do
+# Floating Point.  (scanf() is probably enough).
+
+expect_clean_error_exit() {
+  trap - ERR
+  "$@"
+  exit_code=$?
+  trap "exit 1" ERR
+  if [ $exit_code -eq 0 ]; then
+    exit 1
+  elif [ $exit_code -ge 126 ]; then
+    exit 1
+  fi
+}
+
+floatstring() {
+  for arg in "$@"; do
+    case ${arg} in
+      0)
+        printf "\x00\x00\x00\x00\x00\x00\x00\x00";;
+      0.5)
+        printf "\x00\x00\x00\x00\x00\x00\xe0\x3f";;
+      1)
+        printf "\x00\x00\x00\x00\x00\x00\xf0\x3f";;
+      *)
+        echo "bad arg to floatstring(): ${arg}"
+        exit 1;;
+    esac
+  done
+}
+
+# FIXME: likewise.  And endianness issues (which are a reflection of
+# the endianness of audioDB as of 2007-09-18, unfortunately).
+
+intstring() {
+  # works up to 9 for now
+  if [ $1 -ge 10 ]; then echo "intstring() arg too large: ${1}"; exit 1; fi
+  printf "%b\x00\x00\x00" "\\x${1}"
+}