view audioDB.cpp @ 53:944f05e65a58

Fix what is I think an off-by-one in query and sequence searching. When taking the mean of the N minimum distances, only consider those distances which are closer than the infinite/uninitialized distance. (This renders us potentially vulnerable to an internal consistency horror where we end up trying to divide by zero. This is unlikely to be picked up by unit tests, but might well be by random tests if this is in fact a problem.)
author mas01cr
date Thu, 20 Sep 2007 08:50:35 +0000
parents 4d9ea08b2f5f
children 32f8b7845b30
line wrap: on
line source
/* audioDB.cpp

audioDB version 1.0

A feature vector database management system for content-based retrieval.

Usage: audioDB [OPTIONS]...

      --full-help              Print help, including hidden options, and exit
  -V, --version                Print version and exit
  -H, --help                   print help on audioDB usage and exit.
  -v, --verbosity=detail       level of detail of operational information.  
                                 (default=`1')

Database Setup:
  All database operations require a database argument.
  
  Database commands are UPPER CASE. Command options are lower case.

  -d, --database=filename      database file required by Database commands.
  -N, --NEW                    make a new (initially empty) database.
  -S, --STATUS                 output database information to stdout.
  -D, --DUMP                   output all entries: index key size.
  -L, --L2NORM                 unit norm vectors and norm all future inserts.

Database Insertion:
  The following commands insert feature files, with optional keys and 
  timestamps.

  -I, --INSERT                 add feature vectors to an existing database.
  -U, --UPDATE                 replace inserted vectors associated with key 
                                 with new input vectors.
  -f, --features=filename      binary series of vectors file {int sz:ieee 
                                 double[][sz]:eof}.
  -t, --times=filename         list of time points (ascii) for feature vectors.
  -k, --key=identifier         unique identifier associated with features.
  
  -B, --BATCHINSERT            add feature vectors named in a --featureList 
                                 file (with optional keys in a --keyList file) 
                                 to the named database.
  -F, --featureList=filename   text file containing list of binary feature 
                                 vector files to process
  -T, --timesList=filename     text file containing list of ascii --times for 
                                 each --features file in --featureList.
  -K, --keyList=filename       text file containing list of unique identifiers 
                                 associated with --features.

Database Search:
  Thse commands control the retrieval behaviour.

  -Q, --QUERY=searchtype       content-based search on --database using 
                                 --features as a query. Optionally restrict the 
                                 search to those tracks identified in a 
                                 --keyList.  (possible values="point", 
                                 "track", "sequence")
  -p, --qpoint=position        ordinal position of query start point in 
                                 --features file.  (default=`0')
  -e, --exhaustive             exhaustive search: iterate through all query 
                                 vectors in search. Overrides --qpoint.  
                                 (default=off)
  -n, --pointnn=numpoints      number of point nearest neighbours to use in 
                                 retrieval.  (default=`10')
  -R, --radius=DOUBLE          radius search, returns all 
                                 points/tracks/sequences inside given radius. 
                                  (default=`1.0')
  -x, --expandfactor=DOUBLE    time compress/expand factor of result length to 
                                 query length [1.0 .. 100.0].  (default=`1.1')
  -o, --rotate                 rotate query vectors for rotationally invariant 
                                 search.  (default=off)
  -r, --resultlength=length    maximum length of the result list.  
                                 (default=`10')
  -l, --sequencelength=length  length of sequences for sequence search.  
                                 (default=`16')
  -h, --sequencehop=hop        hop size of sequence window for sequence search. 
                                  (default=`1')

Web Services:
  These commands enable the database process to establish a connection via the 
  internet and operate as separate client and server processes.

  -s, --SERVER=port            run as standalone web service on named port.  
                                 (default=`80011')
  -c, --client=hostname:port   run as a client using named host service.
  
  Copyright (C) 2007 Michael Casey, Goldsmiths, University of London
  
  outputs:
  
  key1 distance1 qpos1 spos1
  key2 distance2 qpos2 spos2
  ...
  keyN distanceN qposN sposN
  
*/

#include "audioDB.h"

#define O2_DEBUG

void audioDB::error(const char* a, const char* b, const char *sysFunc) {
  cerr << a << ": " << b << endl;
  if (sysFunc) {
    perror(sysFunc);
  }
  exit(1);
}

audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult):
  dim(0),
  dbName(0),
  inFile(0),
  key(0),
  trackFile(0),
  trackFileName(0),
  timesFile(0),
  timesFileName(0),
  usingTimes(0),
  command(0),
  dbfid(0),
  db(0),
  dbH(0),
  infid(0),
  indata(0),
  queryType(O2_FLAG_POINT_QUERY),
  verbosity(1),
  pointNN(O2_DEFAULT_POINTNN),
  trackNN(O2_DEFAULT_TRACKNN),
  trackTable(0),
  fileTable(0),
  dataBuf(0),
  l2normTable(0),
  timesTable(0),
  qNorm(0),
  sequenceLength(16),
  sequenceHop(1),
  queryPoint(0),
  usingQueryPoint(0),
  isClient(0),
  isServer(0),
  port(0),
  timesTol(0.1),
  radius(0){
  
  if(processArgs(argc, argv)<0){
    printf("No command found.\n");
    cmdline_parser_print_version ();
    if (strlen(gengetopt_args_info_purpose) > 0)
      printf("%s\n", gengetopt_args_info_purpose);
    printf("%s\n", gengetopt_args_info_usage);
    printf("%s\n", gengetopt_args_info_help[1]);
    printf("%s\n", gengetopt_args_info_help[2]);
    printf("%s\n", gengetopt_args_info_help[0]);
    exit(1);
  }
  
  if(O2_ACTION(COM_SERVER))
    startServer();

  else  if(O2_ACTION(COM_CREATE))
    create(dbName);

  else if(O2_ACTION(COM_INSERT))
    insert(dbName, inFile);

  else if(O2_ACTION(COM_BATCHINSERT))
    batchinsert(dbName, inFile);

  else if(O2_ACTION(COM_QUERY))
    if(isClient)
      ws_query(dbName, inFile, (char*)hostport);
    else
      query(dbName, inFile, adbQueryResult);

  else if(O2_ACTION(COM_STATUS))
    if(isClient)
      ws_status(dbName,(char*)hostport);
    else
      status(dbName);
  
  else if(O2_ACTION(COM_L2NORM))
    l2norm(dbName);
  
  else if(O2_ACTION(COM_DUMP))
    dump(dbName);
  
  else
    error("Unrecognized command",command);
}

audioDB::~audioDB(){
  // Clean up
  if(indata)
    munmap(indata,statbuf.st_size);
  if(db)
    munmap(db,O2_DEFAULTDBSIZE);
  if(dbfid>0)
    close(dbfid);
  if(infid>0)
    close(infid);
  if(dbH)
    delete dbH;
}

int audioDB::processArgs(const unsigned argc, char* const argv[]){

  if(argc<2){
    cmdline_parser_print_version ();
    if (strlen(gengetopt_args_info_purpose) > 0)
      printf("%s\n", gengetopt_args_info_purpose);
    printf("%s\n", gengetopt_args_info_usage);
    printf("%s\n", gengetopt_args_info_help[1]);
    printf("%s\n", gengetopt_args_info_help[2]);
    printf("%s\n", gengetopt_args_info_help[0]);
    exit(0);
  }

  if (cmdline_parser (argc, argv, &args_info) != 0)
    exit(1) ;       

  if(args_info.help_given){
    cmdline_parser_print_help();
    exit(0);
  }

  if(args_info.verbosity_given){
    verbosity=args_info.verbosity_arg;
    if(verbosity<0 || verbosity>10){
      cerr << "Warning: verbosity out of range, setting to 1" << endl;
      verbosity=1;
    }
  }

  if(args_info.radius_given){
    radius=args_info.radius_arg;
    if(radius<=0 || radius>1000000000){
      cerr << "Warning: radius out of range" << endl;
      exit(1);
    }
    else 
      if(verbosity>3)
	cerr << "Setting radius to " << radius << endl;
  }
  
  if(args_info.SERVER_given){
    command=COM_SERVER;
    port=args_info.SERVER_arg;
    if(port<100 || port > 100000)
      error("port out of range");
    isServer=1;
    return 0;
  }

  // No return on client command, find database command
 if(args_info.client_given){
   command=COM_CLIENT;
   hostport=args_info.client_arg;
   isClient=1;
 }

 if(args_info.NEW_given){
   command=COM_CREATE;
   dbName=args_info.database_arg;
   return 0;
 }

 if(args_info.STATUS_given){
   command=COM_STATUS;
   dbName=args_info.database_arg;
   return 0;
 }

 if(args_info.DUMP_given){
   command=COM_DUMP;
   dbName=args_info.database_arg;
   return 0;
 }

 if(args_info.L2NORM_given){
   command=COM_L2NORM;
   dbName=args_info.database_arg;
   return 0;
 }
       
 if(args_info.INSERT_given){
   command=COM_INSERT;
   dbName=args_info.database_arg;
   inFile=args_info.features_arg;
   if(args_info.key_given)
     key=args_info.key_arg;
   if(args_info.times_given){
     timesFileName=args_info.times_arg;
     if(strlen(timesFileName)>0){
       if(!(timesFile = new ifstream(timesFileName,ios::in)))
	 error("Could not open times file for reading", timesFileName);
       usingTimes=1;
     }
   }
   return 0;
 }
 
 if(args_info.BATCHINSERT_given){
   command=COM_BATCHINSERT;
   dbName=args_info.database_arg;
   inFile=args_info.featureList_arg;
   if(args_info.keyList_given)
     key=args_info.keyList_arg; // INCONSISTENT NO CHECK

   /* TO DO: REPLACE WITH
      if(args_info.keyList_given){
      trackFileName=args_info.keyList_arg;
      if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in)))
      error("Could not open keyList file for reading",trackFileName);
      }
      AND UPDATE BATCHINSERT()
   */
   
   if(args_info.timesList_given){
     timesFileName=args_info.timesList_arg;
     if(strlen(timesFileName)>0){
       if(!(timesFile = new ifstream(timesFileName,ios::in)))
	 error("Could not open timesList file for reading", timesFileName);
       usingTimes=1;
     }
   }
   return 0;
 }

 // Query command and arguments
 if(args_info.QUERY_given){
   command=COM_QUERY;
   dbName=args_info.database_arg;
   inFile=args_info.features_arg;

   if(args_info.keyList_given){
     trackFileName=args_info.keyList_arg;
     if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in)))
       error("Could not open keyList file for reading",trackFileName);
   }

   if(args_info.times_given){
     timesFileName=args_info.times_arg;
     if(strlen(timesFileName)>0){
       if(!(timesFile = new ifstream(timesFileName,ios::in)))
	 error("Could not open times file for reading", timesFileName);
       usingTimes=1;
     }
   }

   // query type
   if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0)
     queryType=O2_FLAG_TRACK_QUERY;
   else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0)
     queryType=O2_FLAG_POINT_QUERY;
   else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0)
     queryType=O2_FLAG_SEQUENCE_QUERY;
   else
     error("unsupported query type",args_info.QUERY_arg);

   if(!args_info.exhaustive_flag){
     queryPoint = args_info.qpoint_arg;
     usingQueryPoint=1;
     if(queryPoint<0 || queryPoint >10000)
       error("queryPoint out of range: 0 <= queryPoint <= 10000");
   }


   pointNN=args_info.pointnn_arg;
   if(pointNN<1 || pointNN >1000)
     error("pointNN out of range: 1 <= pointNN <= 1000");

   

   trackNN=args_info.resultlength_arg;
   if(trackNN<1 || trackNN >10000)
     error("resultlength out of range: 1 <= resultlength <= 1000");

	         
   sequenceLength=args_info.sequencelength_arg;
   if(sequenceLength<1 || sequenceLength >1000)
     error("seqlen out of range: 1 <= seqlen <= 1000");

   sequenceHop=args_info.sequencehop_arg;
   if(sequenceHop<1 || sequenceHop >1000)
     error("seqhop out of range: 1 <= seqhop <= 1000");

   return 0;
 }
 return -1; // no command found
}

/* Make a new database

   The database consists of:

   header
   ---------------------------------------------------------------------------------
   | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes |
   ---------------------------------------------------------------------------------
   

   keyTable : list of keys of tracks
   --------------------------------------------------------------------------
   | key 256 bytes                                                          |
   --------------------------------------------------------------------------
   O2_MAXFILES*02_FILENAMELENGTH

   trackTable : Maps implicit feature index to a feature vector matrix
   --------------------------------------------------------------------------
   | numVectors (4 bytes)                                                   |
   --------------------------------------------------------------------------
   O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(INT)

   featureTable
   --------------------------------------------------------------------------
   | v1 v2 v3 ... vd (double)                                               |
   --------------------------------------------------------------------------
   O2_MAXFILES * 02_MEANNUMFEATURES * DIM * sizeof(DOUBLE)

   timesTable
   --------------------------------------------------------------------------
   | timestamp (double)                                                     |
   --------------------------------------------------------------------------
   O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE)

   l2normTable
   --------------------------------------------------------------------------
   | nm (double)                                                            |
   --------------------------------------------------------------------------
   O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE)

*/

void audioDB::get_lock(int fd, bool exclusive) {
  struct flock lock;
  int status;
  
  lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
  lock.l_whence = SEEK_SET;
  lock.l_start = 0;
  lock.l_len = 0; /* "the whole file" */

 retry:
  do {
    status = fcntl(fd, F_SETLKW, &lock);
  } while (status != 0 && errno == EINTR);

  if (status) {
    if (errno == EAGAIN) {
      sleep(1);
      goto retry;
    } else {
      error("fcntl lock error", "", "fcntl");
    }
  }
}

void audioDB::release_lock(int fd) {
  struct flock lock;
  int status;

  lock.l_type = F_UNLCK;
  lock.l_whence = SEEK_SET;
  lock.l_start = 0;
  lock.l_len = 0;

  status = fcntl(fd, F_SETLKW, &lock);

  if (status)
    error("fcntl unlock error", "", "fcntl");
}

void audioDB::create(const char* dbName){
  if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
    error("Can't create database file", dbName, "open");
  get_lock(dbfid, 1);

  // go to the location corresponding to the last byte
  if (lseek (dbfid, O2_DEFAULTDBSIZE - 1, SEEK_SET) == -1)
    error("lseek error in db file", "", "lseek");

  // write a dummy byte at the last location
  if (write (dbfid, "", 1) != 1)
    error("write error", "", "write");
  
  // mmap the output file
  if(verbosity)
    cerr << "header size:" << O2_HEADERSIZE << endl;
  if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
			 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
    error("mmap error for creating database", "", "mmap");
  
  dbH = new dbTableHeaderT();
  assert(dbH);

  // Initialize header
  dbH->magic=O2_MAGIC;
  dbH->numFiles=0;
  dbH->length=0;
  dbH->dim=0;
  dbH->flags=0; //O2_FLAG_L2NORM;

  memcpy (db, dbH, O2_HEADERSIZE);
  if(verbosity)
    cerr << COM_CREATE << " " << dbName << endl;

}


void audioDB::drop(){
    
    
}

// initTables - memory map files passed as arguments
// Precondition: database has already been created
void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile=0){
  if ((dbfid = open (dbName, forWrite ? O_RDWR : O_RDONLY)) < 0)
    error("Can't open database file", dbName, "open");
  get_lock(dbfid, forWrite);

  // open the input file
  if (inFile && (infid = open (inFile, O_RDONLY)) < 0)
    error("can't open input file for reading", inFile, "open");

  // find size of input file
  if (inFile && fstat (infid,&statbuf) < 0)
    error("fstat error finding size of input", "", "fstat");
  
  // Get the database header info
  dbH = new dbTableHeaderT();
  assert(dbH);
  
  if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT))
    error("error reading db header");

  fileTableOffset = O2_HEADERSIZE;
  trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES;
  dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES;
  l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
  timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);

  if(dbH->magic!=O2_MAGIC){
    cerr << "expected: " << O2_MAGIC << ", got:" << dbH->magic << endl;
    error("database file has incorrect header",dbName);
  }

  if(inFile)
    if(dbH->dim==0 && dbH->length==0) // empty database
      read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality
    else {
      unsigned test;
      read(infid,&test,sizeof(unsigned));
      if(dbH->dim!=test){      
	cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl;
	error("feature dimensions do not match database table dimensions");
      }
    }
  
  // mmap the input file 
  if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
      == (caddr_t) -1)
    error("mmap error for input", "", "mmap");

  // mmap the database file
  if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0),
			 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
    error("mmap error for initting tables of database", "", "mmap");

  // Make some handy tables with correct types
  fileTable= (char*)(db+fileTableOffset);
  trackTable = (unsigned*)(db+trackTableOffset);
  dataBuf  = (double*)(db+dataoffset);
  l2normTable = (double*)(db+l2normTableOffset);
  timesTable = (double*)(db+timesTableOffset);

}

void audioDB::insert(const char* dbName, const char* inFile){

  initTables(dbName, 1, inFile);

  if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    error("Must use timestamps with timestamped database","use --times");

  // Check that there is room for at least 1 more file
  if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int)))
    error("No more room in database","insert failed: reason database is full.");
  
  if(!key)
    key=inFile;
  // Linear scan of filenames check for pre-existing feature
  unsigned alreadyInserted=0;
  for(unsigned k=0; k<dbH->numFiles; k++)
    if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){
      alreadyInserted=1;
      break;
    }

  if(alreadyInserted){
    if(verbosity)
      cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl;
    return;
  }
  
  // Make a track index table of features to file indexes
  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
  if(!numVectors){
    if(verbosity)
      cerr << "Warning: ignoring zero-length feature vector file:" << key << endl;
    // CLEAN UP
    munmap(indata,statbuf.st_size);
    munmap(db,O2_DEFAULTDBSIZE);
    close(infid);
    return;
  }

  strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));

  unsigned insertoffset = dbH->length;// Store current state

  // Check times status and insert times from file
  unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
  double* timesdata=timesTable+timesoffset;
  assert(timesdata+numVectors<l2normTable);
  insertTimeStamps(numVectors, timesFile, timesdata);

  // Increment file count
  dbH->numFiles++;

  // Update Header information
  dbH->length+=(statbuf.st_size-sizeof(int));

  // Copy the header back to the database
  memcpy (db, dbH, sizeof(dbTableHeaderT));  

  // Update track to file index map
  //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));  
  memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));  

  // Update the feature database
  memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
  
  // Norm the vectors on input if the database is already L2 normed
  if(dbH->flags & O2_FLAG_L2NORM)
    unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append

  // Report status
  status(dbName);
  if(verbosity)
    cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " 
	 << (statbuf.st_size-sizeof(int)) << " bytes." << endl;

  // CLEAN UP
  munmap(indata,statbuf.st_size);
  close(infid);
}

void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){
  unsigned numtimes=0;
 if(usingTimes){
   if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles)
     dbH->flags=dbH->flags|O2_FLAG_TIMES;
   else if(!(dbH->flags&O2_FLAG_TIMES)){
     cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl;
     usingTimes=0;
   }
   
   if(!timesFile->is_open()){
     if(dbH->flags & O2_FLAG_TIMES){
       munmap(indata,statbuf.st_size);
       munmap(db,O2_DEFAULTDBSIZE);
       error("problem opening times file on timestamped database",timesFileName);
     }
     else{
       cerr << "Warning: problem opening times file. But non-timestamped database, so ignoring times file." << endl;
       usingTimes=0;
     }      
   }

    // Process time file
   if(usingTimes){
     do{
       *timesFile>>*timesdata++;
       if(timesFile->eof())
	 break;
       numtimes++;
     }while(!timesFile->eof() && numtimes<numVectors);
     if(!timesFile->eof()){
	double dummy;
	do{
	  *timesFile>>dummy;
	  if(timesFile->eof())
	    break;
	  numtimes++;
	}while(!timesFile->eof());
     }
     if(numtimes<numVectors || numtimes>numVectors+2){
       munmap(indata,statbuf.st_size);
       munmap(db,O2_DEFAULTDBSIZE);
       close(infid);
       cerr << "expected " << numVectors << " found " << numtimes << endl;
       error("Times file is incorrect length for features file",inFile);
     }
     if(verbosity>2)
       cerr << "numtimes: " << numtimes << endl;
   }
 }
}

void audioDB::batchinsert(const char* dbName, const char* inFile){

  if ((dbfid = open (dbName, O_RDWR)) < 0)
    error("Can't open database file", dbName, "open");
  get_lock(dbfid, 1);

  if(!key)
    key=inFile;
  ifstream *filesIn = 0;
  ifstream *keysIn = 0;
  ifstream* thisTimesFile = 0;

  if(!(filesIn = new ifstream(inFile)))
    error("Could not open batch in file", inFile);
  if(key && key!=inFile)
    if(!(keysIn = new ifstream(key)))
      error("Could not open batch key file",key);
  
  // Get the database header info
  dbH = new dbTableHeaderT();
  assert(dbH);
  
  if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT))
    error("error reading db header");

  if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    error("Must use timestamps with timestamped database","use --times");

  fileTableOffset = O2_HEADERSIZE;
  trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES;
  dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES;
  l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
  timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);

  if(dbH->magic!=O2_MAGIC){
    cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl;
    error("database file has incorrect header",dbName);
  }

  
  unsigned totalVectors=0;
  char *thisKey = new char[MAXSTR];
  char *thisFile = new char[MAXSTR];
  char *thisTimesFileName = new char[MAXSTR];
    
  do{
    filesIn->getline(thisFile,MAXSTR);
    if(key && key!=inFile)
      keysIn->getline(thisKey,MAXSTR);
    else
      thisKey = thisFile;
    if(usingTimes)
      timesFile->getline(thisTimesFileName,MAXSTR);	  
    
    if(filesIn->eof())
      break;

    // open the input file
    if (thisFile && (infid = open (thisFile, O_RDONLY)) < 0)
      error("can't open feature file for reading", thisFile, "open");
  
    // find size of input file
    if (thisFile && fstat (infid,&statbuf) < 0)
      error("fstat error finding size of input", "", "fstat");

    // mmap the database file
    if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
			   MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
      error("mmap error for batchinsert into database", "", "mmap");
    
    // Make some handy tables with correct types
    fileTable= (char*)(db+fileTableOffset);
    trackTable = (unsigned*)(db+trackTableOffset);
    dataBuf  = (double*)(db+dataoffset);
    l2normTable = (double*)(db+l2normTableOffset);
    timesTable = (double*)(db+timesTableOffset);

    // Check that there is room for at least 1 more file
    if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int))))
      error("No more room in database","insert failed: reason database is full.");
    
    if(thisFile)
      if(dbH->dim==0 && dbH->length==0) // empty database
	read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality
      else {
	unsigned test;
	read(infid,&test,sizeof(unsigned));
	if(dbH->dim!=test){      
	  cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl;
	  error("feature dimensions do not match database table dimensions");
	}
      }
  
    // mmap the input file 
    if (thisFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
	== (caddr_t) -1)
      error("mmap error for input", "", "mmap");
  
  
    // Linear scan of filenames check for pre-existing feature
    unsigned alreadyInserted=0;
  
    for(unsigned k=0; k<dbH->numFiles; k++)
      if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){
	alreadyInserted=1;
	break;
      }
  
    if(alreadyInserted){
      if(verbosity)
	cerr << "Warning: key already exists in database:" << thisKey << endl;
    }
    else{
  
      // Make a track index table of features to file indexes
      unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
      if(!numVectors){
	if(verbosity)
	  cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl;
      }
      else{	
	if(usingTimes){
	  if(timesFile->eof())
	    error("not enough timestamp files in timesList");
	  thisTimesFile=new ifstream(thisTimesFileName,ios::in);
	  if(!thisTimesFile->is_open())
	    error("Cannot open timestamp file",thisTimesFileName);
	  unsigned insertoffset=dbH->length;
	  unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
	  double* timesdata=timesTable+timesoffset;
	  assert(timesdata+numVectors<l2normTable);
	  insertTimeStamps(numVectors,thisTimesFile,timesdata);
	  if(thisTimesFile)
	    delete thisTimesFile;
	}
	  
	strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
  
	unsigned insertoffset = dbH->length;// Store current state

	// Increment file count
	dbH->numFiles++;  
  
	// Update Header information
	dbH->length+=(statbuf.st_size-sizeof(int));
	// Copy the header back to the database
	memcpy (db, dbH, sizeof(dbTableHeaderT));  
  
	// Update track to file index map
	//memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));  
	memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));  
	
	// Update the feature database
	memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));
	
	// Norm the vectors on input if the database is already L2 normed
	if(dbH->flags & O2_FLAG_L2NORM)
	  unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append
	
	totalVectors+=numVectors;
      }
    }
    // CLEAN UP
    munmap(indata,statbuf.st_size);
    close(infid);
    munmap(db,O2_DEFAULTDBSIZE);
  }while(!filesIn->eof());

  // mmap the database file
  if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
			 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
    error("mmap error for creating database", "", "mmap");
  
  if(verbosity)
    cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " 
	 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl;
  
  // Report status
  status(dbName);
  
  munmap(db,O2_DEFAULTDBSIZE);
}

void audioDB::ws_status(const char*dbName, char* hostport){
  struct soap soap;
  int adbStatusResult;  
  
  // Query an existing adb database
  soap_init(&soap);
  if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResult)==SOAP_OK)
    std::cout << "result = " << adbStatusResult << std::endl;
  else
    soap_print_fault(&soap,stderr);
  
  soap_destroy(&soap);
  soap_end(&soap);
  soap_done(&soap);
}

void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){
  struct soap soap;
  adb__queryResult adbQueryResult;  

  soap_init(&soap);  
  if(soap_call_adb__query(&soap,hostport,NULL,
			  (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName,
			  queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResult)==SOAP_OK){
    //std::cerr << "result list length:" << adbQueryResult.__sizeRlist << std::endl;
    for(int i=0; i<adbQueryResult.__sizeRlist; i++)
      std::cout << adbQueryResult.Rlist[i] << " " << adbQueryResult.Dist[i] 
		<< " " << adbQueryResult.Qpos[i] << " " << adbQueryResult.Spos[i] << std::endl;
  }
  else
    soap_print_fault(&soap,stderr);
  
  soap_destroy(&soap);
  soap_end(&soap);
  soap_done(&soap);

}


void audioDB::status(const char* dbName){
  if(!dbH)
    initTables(dbName, 0, 0);
  
  // Update Header information
  cout << "num files:" << dbH->numFiles << endl;
  cout << "data dim:" << dbH->dim <<endl;
  if(dbH->dim>0){
    cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl;
    cout << "vectors available:" << (timesTableOffset-(dataoffset+dbH->length))/(sizeof(double)*dbH->dim) << endl;
  }
  cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(timesTableOffset-dataoffset) << "%)" << endl;
  cout << "bytes available:" << timesTableOffset-(dataoffset+dbH->length) << " (" <<
    (100.0*(timesTableOffset-(dataoffset+dbH->length)))/(timesTableOffset-dataoffset) << "%)" << endl;
  cout << "flags:" << dbH->flags << endl;

  unsigned dudCount=0;
  unsigned nullCount=0;
  for(unsigned k=0; k<dbH->numFiles; k++){
    if(trackTable[k]<sequenceLength){
      dudCount++;
      if(!trackTable[k])
	nullCount++;
    }
  }
  cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl;    
}


void audioDB::dump(const char* dbName){
  if(!dbH)
    initTables(dbName, 0, 0);
  
  for(unsigned k=0, j=0; k<dbH->numFiles; k++){
    cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl;
    j+=trackTable[k];
  }

  status(dbName);
}

void audioDB::l2norm(const char* dbName){
  initTables(dbName, true, 0);
  if(dbH->length>0){
    unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
    unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
  }
  // Update database flags
  dbH->flags = dbH->flags|O2_FLAG_L2NORM;
  memcpy (db, dbH, O2_HEADERSIZE);
}
  

  
void audioDB::query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){  
  switch(queryType){
  case O2_FLAG_POINT_QUERY:
    pointQuery(dbName, inFile, adbQueryResult);
    break;
  case O2_FLAG_SEQUENCE_QUERY:
    if(radius==0)
      trackSequenceQueryNN(dbName, inFile, adbQueryResult);
    else
      trackSequenceQueryRad(dbName, inFile, adbQueryResult);
    break;
  case O2_FLAG_TRACK_QUERY:
    trackPointQuery(dbName, inFile, adbQueryResult);
    break;
  default:
    error("unrecognized queryType in query()");
    
  }  
}

//return ordinal position of key in keyTable
unsigned audioDB::getKeyPos(char* key){  
  for(unsigned k=0; k<dbH->numFiles; k++)
    if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
      return k;
  error("Key not found",key);
  return O2_ERR_KEYNOTFOUND;
}

// Basic point query engine
void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
  
  initTables(dbName, 0, inFile);
  
  // For each input vector, find the closest pointNN matching output vectors and report
  // we use stdout in this stub version
  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
    
  double* query = (double*)(indata+sizeof(int));
  double* data = dataBuf;
  double* queryCopy = 0;

  if( dbH->flags & O2_FLAG_L2NORM ){
    // Make a copy of the query
    queryCopy = new double[numVectors*dbH->dim];
    qNorm = new double[numVectors];
    assert(queryCopy&&qNorm);
    memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
    unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
    query = queryCopy;
  }

  // Make temporary dynamic memory for results
  assert(pointNN>0 && pointNN<=O2_MAXNN);
  double distances[pointNN];
  unsigned qIndexes[pointNN];
  unsigned sIndexes[pointNN];
  for(unsigned k=0; k<pointNN; k++){
    distances[k]=0.0;
    qIndexes[k]=~0;
    sIndexes[k]=~0;    
  }

  unsigned j=numVectors; 
  unsigned k,l,n;
  double thisDist;

  unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double));
  double meanQdur = 0;
  double* timesdata = 0;
  double* dbdurs = 0;

  if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
    cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
    usingTimes=0;
  }

  else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
  
  else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
    timesdata = new double[numVectors];
    insertTimeStamps(numVectors, timesFile, timesdata);
    // Calculate durations of points
    for(k=0; k<numVectors-1; k++){
      timesdata[k]=timesdata[k+1]-timesdata[k];
      meanQdur+=timesdata[k];
    }
    meanQdur/=k;
    // Individual exhaustive timepoint durations
    dbdurs = new double[totalVecs];
    for(k=0; k<totalVecs-1; k++)
      dbdurs[k]=timesTable[k+1]-timesTable[k];
    j--; // decrement vector counter by one
  }

  if(usingQueryPoint)
    if(queryPoint>numVectors-1)
      error("queryPoint > numVectors in query");
    else{
      if(verbosity>1)
	cerr << "query point: " << queryPoint << endl; cerr.flush();
      query=query+queryPoint*dbH->dim;
      numVectors=queryPoint+1;
      j=1;
    }

  gettimeofday(&tv1, NULL);   
  while(j--){ // query
    data=dataBuf;
    k=totalVecs; // number of database vectors
    while(k--){  // database
      thisDist=0;
      l=dbH->dim;
      double* q=query;
      while(l--)
	thisDist+=*q++**data++;
      if(!usingTimes || 
	 (usingTimes 
	  && fabs(dbdurs[totalVecs-k-1]-timesdata[numVectors-j-1])<timesdata[numVectors-j-1]*timesTol)){
	n=pointNN;
	while(n--){
	  if(thisDist>=distances[n]){
	    if((n==0 || thisDist<=distances[n-1])){
	      // Copy all values above up the queue
	      for( l=pointNN-1 ; l >= n+1 ; l--){
		distances[l]=distances[l-1];
		qIndexes[l]=qIndexes[l-1];
		sIndexes[l]=sIndexes[l-1];	      
	      }
	      distances[n]=thisDist;
	      qIndexes[n]=numVectors-j-1;
	      sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1;
	      break;
	    }
	  }
	  else
	    break;
	}
      }
    }
    // Move query pointer to next query point
    query+=dbH->dim;
  }

  gettimeofday(&tv2, NULL); 
  if(verbosity>1)
    cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;

  if(adbQueryResult==0){
    // Output answer
    // Loop over nearest neighbours    
    for(k=0; k < pointNN; k++){
      // Scan for key
      unsigned cumTrack=0;
      for(l=0 ; l<dbH->numFiles; l++){
	cumTrack+=trackTable[l];
	if(sIndexes[k]<cumTrack){
	  cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " 
	       << sIndexes[k]+trackTable[l]-cumTrack << endl;
	  break;
	}
      }
    }
  }
  else{ // Process Web Services Query
    int listLen = pointNN;
    adbQueryResult->__sizeRlist=listLen;
    adbQueryResult->__sizeDist=listLen;
    adbQueryResult->__sizeQpos=listLen;
    adbQueryResult->__sizeSpos=listLen;
    adbQueryResult->Rlist= new char*[listLen];
    adbQueryResult->Dist = new double[listLen];
    adbQueryResult->Qpos = new int[listLen];
    adbQueryResult->Spos = new int[listLen];
    for(k=0; k<adbQueryResult->__sizeRlist; k++){
      adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
      adbQueryResult->Dist[k]=distances[k];
      adbQueryResult->Qpos[k]=qIndexes[k];
      unsigned cumTrack=0;
      for(l=0 ; l<dbH->numFiles; l++){
	cumTrack+=trackTable[l];
	if(sIndexes[k]<cumTrack){
	  sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE);
	  break;
	}
      }
      adbQueryResult->Spos[k]=sIndexes[k]+trackTable[l]-cumTrack;
    }
  }
  
  // Clean up
  if(queryCopy)
    delete queryCopy;
  if(qNorm)
    delete qNorm;
  if(timesdata)
    delete timesdata;
  if(dbdurs)
    delete dbdurs;
}

// trackPointQuery  
// return the trackNN closest tracks to the query track
// uses average of pointNN points per track 
void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){  
  initTables(dbName, 0, inFile);
  
  // For each input vector, find the closest pointNN matching output vectors and report
  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
  unsigned numTracks = dbH->numFiles;

  double* query = (double*)(indata+sizeof(int));
  double* data = dataBuf;
  double* queryCopy = 0;

  if( dbH->flags & O2_FLAG_L2NORM ){
    // Make a copy of the query
    queryCopy = new double[numVectors*dbH->dim];
    qNorm = new double[numVectors];
    assert(queryCopy&&qNorm);
    memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
    unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
    query = queryCopy;
  }

  assert(pointNN>0 && pointNN<=O2_MAXNN);
  assert(trackNN>0 && trackNN<=O2_MAXNN);

  // Make temporary dynamic memory for results
  double trackDistances[trackNN];
  unsigned trackIDs[trackNN];
  unsigned trackQIndexes[trackNN];
  unsigned trackSIndexes[trackNN];

  double distances[pointNN];
  unsigned qIndexes[pointNN];
  unsigned sIndexes[pointNN];

  unsigned j=numVectors; // number of query points
  unsigned k,l,n, track, trackOffset=0, processedTracks=0;
  double thisDist;

  for(k=0; k<pointNN; k++){
    distances[k]=0.0;
    qIndexes[k]=~0;
    sIndexes[k]=~0;    
  }

  for(k=0; k<trackNN; k++){
    trackDistances[k]=0.0;
    trackQIndexes[k]=~0;
    trackSIndexes[k]=~0;
    trackIDs[k]=~0;
  }

  double meanQdur = 0;
  double* timesdata = 0;
  double* meanDBdur = 0;
  
  if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
    cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
    usingTimes=0;
  }
  
  else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
  
  else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
    timesdata = new double[numVectors];
    insertTimeStamps(numVectors, timesFile, timesdata);
    // Calculate durations of points
    for(k=0; k<numVectors-1; k++){
      timesdata[k]=timesdata[k+1]-timesdata[k];
      meanQdur+=timesdata[k];
    }
    meanQdur/=k;
    meanDBdur = new double[dbH->numFiles];
    for(k=0; k<dbH->numFiles; k++){
      meanDBdur[k]=0.0;
      for(j=0; j<trackTable[k]-1 ; j++)
	meanDBdur[k]+=timesTable[j+1]-timesTable[j];
      meanDBdur[k]/=j;
    }
  }

  if(usingQueryPoint)
    if(queryPoint>numVectors-1)
      error("queryPoint > numVectors in query");
    else{
      if(verbosity>1)
	cerr << "query point: " << queryPoint << endl; cerr.flush();
      query=query+queryPoint*dbH->dim;
      numVectors=queryPoint+1;
    }
  
  // build track offset table
  unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
  unsigned cumTrack=0;
  unsigned trackIndexOffset;
  for(k=0; k<dbH->numFiles;k++){
    trackOffsetTable[k]=cumTrack;
    cumTrack+=trackTable[k]*dbH->dim;
  }

  char nextKey[MAXSTR];

  gettimeofday(&tv1, NULL); 
        
  for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
    if(trackFile){
      if(!trackFile->eof()){
	trackFile->getline(nextKey,MAXSTR);
	track=getKeyPos(nextKey);
      }
      else
	break;
    }
    trackOffset=trackOffsetTable[track];     // numDoubles offset
    trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
    if(verbosity>7)
      cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush();

    if(dbH->flags & O2_FLAG_L2NORM)
      usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy;
    else
      usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int));
    if(usingQueryPoint)
      j=1;
    else
      j=numVectors;
    while(j--){
      k=trackTable[track];  // number of vectors in track
      data=dataBuf+trackOffset; // data for track
      while(k--){
	thisDist=0;
	l=dbH->dim;
	double* q=query;
	while(l--)
	  thisDist+=*q++**data++;
	if(!usingTimes || 
	   (usingTimes 
	    && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
	  n=pointNN;
	  while(n--){
	    if(thisDist>=distances[n]){
	      if((n==0 || thisDist<=distances[n-1])){
		// Copy all values above up the queue
		for( l=pointNN-1 ; l > n ; l--){
		  distances[l]=distances[l-1];
		  qIndexes[l]=qIndexes[l-1];
		  sIndexes[l]=sIndexes[l-1];	      
		}
		distances[n]=thisDist;
		qIndexes[n]=numVectors-j-1;
		sIndexes[n]=trackTable[track]-k-1;
		break;
	      }
	    }
	    else
	      break;
	  }
	}
      } // track
      // Move query pointer to next query point
      query+=dbH->dim;
    } // query 
    // Take the average of this track's distance
    // Test the track distances
    thisDist=0;
    n=pointNN;
    while(n--)
      thisDist+=distances[pointNN-n-1];
    thisDist/=pointNN;
    n=trackNN;
    while(n--){
      if(thisDist>=trackDistances[n]){
	if((n==0 || thisDist<=trackDistances[n-1])){
	  // Copy all values above up the queue
	  for( l=pointNN-1 ; l > n ; l--){
	    trackDistances[l]=trackDistances[l-1];
	    trackQIndexes[l]=trackQIndexes[l-1];
	    trackSIndexes[l]=trackSIndexes[l-1];
	    trackIDs[l]=trackIDs[l-1];
	  }
	  trackDistances[n]=thisDist;
	  trackQIndexes[n]=qIndexes[0];
	  trackSIndexes[n]=sIndexes[0];
	  trackIDs[n]=track;
	  break;
	}
      }
      else
	break;
    }
    for(unsigned k=0; k<pointNN; k++){
      distances[k]=0.0;
      qIndexes[k]=~0;
      sIndexes[k]=~0;    
    }
  } // tracks
  gettimeofday(&tv2, NULL); 

  if(verbosity>1)
    cerr << endl << "processed tracks :" << processedTracks 
	 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;

  if(adbQueryResult==0){
    if(verbosity>1)
      cerr<<endl;
    // Output answer
    // Loop over nearest neighbours
    for(k=0; k < min(trackNN,processedTracks); k++)
      cout << fileTable+trackIDs[k]*O2_FILETABLESIZE 
	   << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
  }
  else{ // Process Web Services Query
    int listLen = min(trackNN, processedTracks);
    adbQueryResult->__sizeRlist=listLen;
    adbQueryResult->__sizeDist=listLen;
    adbQueryResult->__sizeQpos=listLen;
    adbQueryResult->__sizeSpos=listLen;
    adbQueryResult->Rlist= new char*[listLen];
    adbQueryResult->Dist = new double[listLen];
    adbQueryResult->Qpos = new int[listLen];
    adbQueryResult->Spos = new int[listLen];
    for(k=0; k<adbQueryResult->__sizeRlist; k++){
      adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
      adbQueryResult->Dist[k]=trackDistances[k];
      adbQueryResult->Qpos[k]=trackQIndexes[k];
      adbQueryResult->Spos[k]=trackSIndexes[k];
      sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
    }
  }
    

  // Clean up
  if(trackOffsetTable)
    delete trackOffsetTable;
  if(queryCopy)
    delete queryCopy;
  if(qNorm)
    delete qNorm;
  if(timesdata)
    delete timesdata;
  if(meanDBdur)
    delete meanDBdur;

}


// k nearest-neighbor (k-NN) search between query and target tracks
// efficient implementation based on matched filter
// assumes normed shingles
// outputs distances of retrieved shingles, max retreived = pointNN shingles per per track
void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
  
  initTables(dbName, 0, inFile);
  
  // For each input vector, find the closest pointNN matching output vectors and report
  // we use stdout in this stub version
  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
  unsigned numTracks = dbH->numFiles;
  
  double* query = (double*)(indata+sizeof(int));
  double* data = dataBuf;
  double* queryCopy = 0;

  double qMeanL2;
  double* sMeanL2;

  unsigned USE_THRESH=0;
  double SILENCE_THRESH=0;
  double DIFF_THRESH=0;

  if(!(dbH->flags & O2_FLAG_L2NORM) )
    error("Database must be L2 normed for sequence query","use -l2norm");
  
  if(verbosity>1)
    cerr << "performing norms ... "; cerr.flush();
  unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);

  // Make a copy of the query
  queryCopy = new double[numVectors*dbH->dim];
  memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
  qNorm = new double[numVectors];
  sNorm = new double[dbVectors];
  sMeanL2=new double[dbH->numFiles];
  assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);    
  unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
  query = queryCopy;

  // Make norm measurements relative to sequenceLength
  unsigned w = sequenceLength-1;
  unsigned i,j;
  double* ps;
  double tmp1,tmp2;

  // Copy the L2 norm values to core to avoid disk random access later on
  memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
  double* snPtr = sNorm;
  for(i=0; i<dbH->numFiles; i++){
    if(trackTable[i]>=sequenceLength){
      tmp1=*snPtr;
      j=1;
      w=sequenceLength-1;
      while(w--)
	*snPtr+=snPtr[j++];
      ps = snPtr+1;
      w=trackTable[i]-sequenceLength; // +1 - 1
      while(w--){
	tmp2=*ps;
	*ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
	tmp1=tmp2;
	ps++;
      }
      ps = snPtr;
      w=trackTable[i]-sequenceLength+1;
      while(w--){
	*ps=sqrt(*ps);
	ps++;
      }
    }
    snPtr+=trackTable[i];
  }
  
  double* pn = sMeanL2;
  w=dbH->numFiles;
  while(w--)
    *pn++=0.0;
  ps=sNorm;
  unsigned processedTracks=0;
  for(i=0; i<dbH->numFiles; i++){
    if(trackTable[i]>sequenceLength-1){
      w = trackTable[i]-sequenceLength;
      pn = sMeanL2+i;
      *pn=0;
      while(w--)
	if(*ps>0)
	  *pn+=*ps++;
      *pn/=trackTable[i]-sequenceLength;
      SILENCE_THRESH+=*pn;
      processedTracks++;
    }
    ps = sNorm + trackTable[i];
  }
  if(verbosity>1)
    cerr << "processedTracks: " << processedTracks << endl;

    
  SILENCE_THRESH/=processedTracks;
  USE_THRESH=1; // Turn thresholding on
  DIFF_THRESH=SILENCE_THRESH; //  mean shingle power
  SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
  if(verbosity>4)
    cerr << "silence thresh: " << SILENCE_THRESH;
  w=sequenceLength-1;
  i=1;
  tmp1=*qNorm;
  while(w--)
    *qNorm+=qNorm[i++];
  ps = qNorm+1;
  w=numVectors-sequenceLength; // +1 -1
  while(w--){
    tmp2=*ps;
    *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
    tmp1=tmp2;
    ps++;
  }
  ps = qNorm;
  qMeanL2 = 0;
  w=numVectors-sequenceLength+1;
  while(w--){
    *ps=sqrt(*ps);
    qMeanL2+=*ps++;
  }
  qMeanL2 /= numVectors-sequenceLength+1;

  if(verbosity>1)
    cerr << "done." << endl;    
  
  
  if(verbosity>1)
    cerr << "matching tracks..." << endl;
  
  assert(pointNN>0 && pointNN<=O2_MAXNN);
  assert(trackNN>0 && trackNN<=O2_MAXNN);
  
  // Make temporary dynamic memory for results
  double trackDistances[trackNN];
  unsigned trackIDs[trackNN];
  unsigned trackQIndexes[trackNN];
  unsigned trackSIndexes[trackNN];
  
  double distances[pointNN];
  unsigned qIndexes[pointNN];
  unsigned sIndexes[pointNN];
  

  unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
  double thisDist;
  double oneOverWL=1.0/wL;
  
  for(k=0; k<pointNN; k++){
    distances[k]=1.0e6;
    qIndexes[k]=~0;
    sIndexes[k]=~0;    
  }
  
  for(k=0; k<trackNN; k++){
    trackDistances[k]=1.0e6;
    trackQIndexes[k]=~0;
    trackSIndexes[k]=~0;
    trackIDs[k]=~0;
  }

  // Timestamp and durations processing
  double meanQdur = 0;
  double* timesdata = 0;
  double* meanDBdur = 0;
  
  if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
    cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
    usingTimes=0;
  }
  
  else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
  
  else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
    timesdata = new double[numVectors];
    assert(timesdata);
    insertTimeStamps(numVectors, timesFile, timesdata);
    // Calculate durations of points
    for(k=0; k<numVectors-1; k++){
      timesdata[k]=timesdata[k+1]-timesdata[k];
      meanQdur+=timesdata[k];
    }
    meanQdur/=k;
    if(verbosity>1)
      cerr << "mean query file duration: " << meanQdur << endl;
    meanDBdur = new double[dbH->numFiles];
    assert(meanDBdur);
    for(k=0; k<dbH->numFiles; k++){
      meanDBdur[k]=0.0;
      for(j=0; j<trackTable[k]-1 ; j++)
	meanDBdur[k]+=timesTable[j+1]-timesTable[j];
      meanDBdur[k]/=j;
    }
  }

  if(usingQueryPoint)
    if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
      error("queryPoint > numVectors-wL+1 in query");
    else{
      if(verbosity>1)
	cerr << "query point: " << queryPoint << endl; cerr.flush();
      query=query+queryPoint*dbH->dim;
      qNorm=qNorm+queryPoint;
      numVectors=wL;
    }
  
  double ** D = 0;    // Differences query and target 
  double ** DD = 0;   // Matched filter distance

  D = new double*[numVectors];
  assert(D);
  DD = new double*[numVectors];
  assert(DD);

  gettimeofday(&tv1, NULL); 
  processedTracks=0;
  unsigned successfulTracks=0;

  double* qp;
  double* sp;
  double* dp;
  double diffL2;

  // build track offset table
  unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
  unsigned cumTrack=0;
  unsigned trackIndexOffset;
  for(k=0; k<dbH->numFiles;k++){
    trackOffsetTable[k]=cumTrack;
    cumTrack+=trackTable[k]*dbH->dim;
  }

  char nextKey [MAXSTR];

  // chi^2 statistics
  double sampleCount = 0;
  double sampleSum = 0;
  double logSampleSum = 0;
  double minSample = 1e9;
  double maxSample = 0;

  // Track loop 
  for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){

    // get trackID from file if using a control file
    if(trackFile){
      if(!trackFile->eof()){
	trackFile->getline(nextKey,MAXSTR);
	track=getKeyPos(nextKey);
      }
      else
	break;
    }

    trackOffset=trackOffsetTable[track];     // numDoubles offset
    trackIndexOffset=trackOffset/dbH->dim; // numVectors offset

    if(sequenceLength<trackTable[track]){  // test for short sequences
      
      if(verbosity>7)
	cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
		
      // Sum products matrix
      for(j=0; j<numVectors;j++){
	D[j]=new double[trackTable[track]]; 
	assert(D[j]);

      }

      // Matched filter matrix
      for(j=0; j<numVectors;j++){
	DD[j]=new double[trackTable[track]];
	assert(DD[j]);
      }

      double tmp;
      // Dot product
      for(j=0; j<numVectors; j++)
	for(k=0; k<trackTable[track]; k++){
	  qp=query+j*dbH->dim;
	  sp=dataBuf+trackOffset+k*dbH->dim;
	  DD[j][k]=0.0; // Initialize matched filter array
	  dp=&D[j][k];  // point to correlation cell j,k
	  *dp=0.0;      // initialize correlation cell
	  l=dbH->dim;         // size of vectors
	  while(l--)
	    *dp+=*qp++**sp++;
	}
  
      // Matched Filter
      // HOP SIZE == 1
      double* spd;
      if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
	for(w=0; w<wL; w++)
	  for(j=0; j<numVectors-w; j++){ 
	    sp=DD[j];
	    spd=D[j+w]+w;
	    k=trackTable[track]-w;
	    while(k--)
	      *sp+++=*spd++;
	  }
      }

      else{ // HOP_SIZE != 1
	for(w=0; w<wL; w++)
	  for(j=0; j<numVectors-w; j+=HOP_SIZE){
	    sp=DD[j];
	    spd=D[j+w]+w;
	    for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
	      *sp+=*spd;
	      sp+=HOP_SIZE;
	      spd+=HOP_SIZE;
	    }
	  }
      }
      
      if(verbosity>3 && usingTimes){
	cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
	cerr.flush();
      }

      if(!usingTimes || 
	 (usingTimes 
	  && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){

	if(verbosity>3 && usingTimes){
	  cerr << "within duration tolerance." << endl;
	  cerr.flush();
	}

	// Search for minimum distance by shingles (concatenated vectors)
	for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
	  for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
	    thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
	    if(verbosity>10)
	      cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl;
	    // Gather chi^2 statistics
	    if(thisDist<minSample)
	      minSample=thisDist;
	    else if(thisDist>maxSample)
	      maxSample=thisDist;
	    if(thisDist>1e-9){
	      sampleCount++;
	      sampleSum+=thisDist;
	      logSampleSum+=log(thisDist);
	    }

	    // diffL2 = fabs(qNorm[j] - sNorm[trackIndexOffset+k]);
	    // Power test
	    if(!USE_THRESH || 
	       // Threshold on mean L2 of Q and S sequences
	       (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && 
		// Are both query and target windows above mean energy?
		(qNorm[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // &&  diffL2 < DIFF_THRESH )))
	      thisDist=thisDist; // Computed above
	    else
	      thisDist=1000000.0;

	    // k-NN match algorithm
	    m=pointNN;
	    while(m--){
	      if(thisDist<=distances[m])
		if(m==0 || thisDist>=distances[m-1]){
		// Shuffle distances up the list
		for(l=pointNN-1; l>m; l--){
		  distances[l]=distances[l-1];
		  qIndexes[l]=qIndexes[l-1];
		  sIndexes[l]=sIndexes[l-1];
		}
		distances[m]=thisDist;
		if(usingQueryPoint)
		  qIndexes[m]=queryPoint;
		else
		  qIndexes[m]=j;
		sIndexes[m]=k;
		break;
		}
	    }
	  }
	// Calculate the mean of the N-Best matches
	thisDist=0.0;
	for(m=0; m<pointNN; m++) {
          if (distances[m] == 1000000.0) break;
	  thisDist+=distances[m];
        }
	thisDist/=m;
	
	// Let's see the distances then...
	if(verbosity>3)
	  cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;


	// All the track stuff goes here
	n=trackNN;
	while(n--){
	  if(thisDist<=trackDistances[n]){
	    if((n==0 || thisDist>=trackDistances[n-1])){
	      // Copy all values above up the queue
	      for( l=trackNN-1 ; l > n ; l--){
		trackDistances[l]=trackDistances[l-1];
		trackQIndexes[l]=trackQIndexes[l-1];
		trackSIndexes[l]=trackSIndexes[l-1];
		trackIDs[l]=trackIDs[l-1];
	      }
	      trackDistances[n]=thisDist;
	      trackQIndexes[n]=qIndexes[0];
	      trackSIndexes[n]=sIndexes[0];
	      successfulTracks++;
	      trackIDs[n]=track;
	      break;
	    }
	  }
	  else
	    break;
	}
      } // Duration match
            
      // Clean up current track
      if(D!=NULL){
	for(j=0; j<numVectors; j++)
	  delete[] D[j];
      }

      if(DD!=NULL){
	for(j=0; j<numVectors; j++)
	  delete[] DD[j];
      }
    }
    // per-track reset array values
    for(unsigned k=0; k<pointNN; k++){
      distances[k]=1.0e6;
      qIndexes[k]=~0;
      sIndexes[k]=~0;    
    }
  }

  gettimeofday(&tv2,NULL);
  if(verbosity>1){
    cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" 
	 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
    cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum 
	 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
  }  
  if(adbQueryResult==0){
    if(verbosity>1)
      cerr<<endl;
    // Output answer
    // Loop over nearest neighbours
    for(k=0; k < min(trackNN,successfulTracks); k++)
      cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " " 
	   << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
  }
  else{ // Process Web Services Query
    int listLen = min(trackNN, processedTracks);
    adbQueryResult->__sizeRlist=listLen;
    adbQueryResult->__sizeDist=listLen;
    adbQueryResult->__sizeQpos=listLen;
    adbQueryResult->__sizeSpos=listLen;
    adbQueryResult->Rlist= new char*[listLen];
    adbQueryResult->Dist = new double[listLen];
    adbQueryResult->Qpos = new int[listLen];
    adbQueryResult->Spos = new int[listLen];
    for(k=0; k<adbQueryResult->__sizeRlist; k++){
      adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
      adbQueryResult->Dist[k]=trackDistances[k];
      adbQueryResult->Qpos[k]=trackQIndexes[k];
      adbQueryResult->Spos[k]=trackSIndexes[k];
      sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
    }
  }


  // Clean up
  if(trackOffsetTable)
    delete[] trackOffsetTable;
  if(queryCopy)
    delete[] queryCopy;
  //if(qNorm)
  //delete qNorm;
  if(D)
    delete[] D;
  if(DD)
    delete[] DD;
  if(timesdata)
    delete[] timesdata;
  if(meanDBdur)
    delete[] meanDBdur;


}

// Radius search between query and target tracks
// efficient implementation based on matched filter
// assumes normed shingles
// outputs count of retrieved shingles, max retreived = one shingle per query shingle per track
void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
  
  initTables(dbName, 0, inFile);
  
  // For each input vector, find the closest pointNN matching output vectors and report
  // we use stdout in this stub version
  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
  unsigned numTracks = dbH->numFiles;
  
  double* query = (double*)(indata+sizeof(int));
  double* data = dataBuf;
  double* queryCopy = 0;

  double qMeanL2;
  double* sMeanL2;

  unsigned USE_THRESH=0;
  double SILENCE_THRESH=0;
  double DIFF_THRESH=0;

  if(!(dbH->flags & O2_FLAG_L2NORM) )
    error("Database must be L2 normed for sequence query","use -l2norm");
  
  if(verbosity>1)
    cerr << "performing norms ... "; cerr.flush();
  unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);

  // Make a copy of the query
  queryCopy = new double[numVectors*dbH->dim];
  memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
  qNorm = new double[numVectors];
  sNorm = new double[dbVectors];
  sMeanL2=new double[dbH->numFiles];
  assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);    
  unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
  query = queryCopy;

  // Make norm measurements relative to sequenceLength
  unsigned w = sequenceLength-1;
  unsigned i,j;
  double* ps;
  double tmp1,tmp2;

  // Copy the L2 norm values to core to avoid disk random access later on
  memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
  double* snPtr = sNorm;
  for(i=0; i<dbH->numFiles; i++){
    if(trackTable[i]>=sequenceLength){
      tmp1=*snPtr;
      j=1;
      w=sequenceLength-1;
      while(w--)
	*snPtr+=snPtr[j++];
      ps = snPtr+1;
      w=trackTable[i]-sequenceLength; // +1 - 1
      while(w--){
	tmp2=*ps;
	*ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
	tmp1=tmp2;
	ps++;
      }
      ps = snPtr;
      w=trackTable[i]-sequenceLength+1;
      while(w--){
	*ps=sqrt(*ps);
	ps++;
      }
    }
    snPtr+=trackTable[i];
  }
  
  double* pn = sMeanL2;
  w=dbH->numFiles;
  while(w--)
    *pn++=0.0;
  ps=sNorm;
  unsigned processedTracks=0;
  for(i=0; i<dbH->numFiles; i++){
    if(trackTable[i]>sequenceLength-1){
      w = trackTable[i]-sequenceLength;
      pn = sMeanL2+i;
      *pn=0;
      while(w--)
	if(*ps>0)
	  *pn+=*ps++;
      *pn/=trackTable[i]-sequenceLength;
      SILENCE_THRESH+=*pn;
      processedTracks++;
    }
    ps = sNorm + trackTable[i];
  }
  if(verbosity>1)
    cerr << "processedTracks: " << processedTracks << endl;

    
  SILENCE_THRESH/=processedTracks;
  USE_THRESH=1; // Turn thresholding on
  DIFF_THRESH=SILENCE_THRESH; //  mean shingle power
  SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
  if(verbosity>4)
    cerr << "silence thresh: " << SILENCE_THRESH;
  w=sequenceLength-1;
  i=1;
  tmp1=*qNorm;
  while(w--)
    *qNorm+=qNorm[i++];
  ps = qNorm+1;
  w=numVectors-sequenceLength; // +1 -1
  while(w--){
    tmp2=*ps;
    *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
    tmp1=tmp2;
    ps++;
  }
  ps = qNorm;
  qMeanL2 = 0;
  w=numVectors-sequenceLength+1;
  while(w--){
    *ps=sqrt(*ps);
    qMeanL2+=*ps++;
  }
  qMeanL2 /= numVectors-sequenceLength+1;

  if(verbosity>1)
    cerr << "done." << endl;    
  
  
  if(verbosity>1)
    cerr << "matching tracks..." << endl;
  
  assert(pointNN>0 && pointNN<=O2_MAXNN);
  assert(trackNN>0 && trackNN<=O2_MAXNN);
  
  // Make temporary dynamic memory for results
  double trackDistances[trackNN];
  unsigned trackIDs[trackNN];
  unsigned trackQIndexes[trackNN];
  unsigned trackSIndexes[trackNN];
  
  double distances[pointNN];
  unsigned qIndexes[pointNN];
  unsigned sIndexes[pointNN];
  

  unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
  double thisDist;
  double oneOverWL=1.0/wL;
  
  for(k=0; k<pointNN; k++){
    distances[k]=0.0;
    qIndexes[k]=~0;
    sIndexes[k]=~0;    
  }
  
  for(k=0; k<trackNN; k++){
    trackDistances[k]=0.0;
    trackQIndexes[k]=~0;
    trackSIndexes[k]=~0;
    trackIDs[k]=~0;
  }

  // Timestamp and durations processing
  double meanQdur = 0;
  double* timesdata = 0;
  double* meanDBdur = 0;
  
  if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
    cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
    usingTimes=0;
  }
  
  else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;
  
  else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
    timesdata = new double[numVectors];
    assert(timesdata);
    insertTimeStamps(numVectors, timesFile, timesdata);
    // Calculate durations of points
    for(k=0; k<numVectors-1; k++){
      timesdata[k]=timesdata[k+1]-timesdata[k];
      meanQdur+=timesdata[k];
    }
    meanQdur/=k;
    if(verbosity>1)
      cerr << "mean query file duration: " << meanQdur << endl;
    meanDBdur = new double[dbH->numFiles];
    assert(meanDBdur);
    for(k=0; k<dbH->numFiles; k++){
      meanDBdur[k]=0.0;
      for(j=0; j<trackTable[k]-1 ; j++)
	meanDBdur[k]+=timesTable[j+1]-timesTable[j];
      meanDBdur[k]/=j;
    }
  }

  if(usingQueryPoint)
    if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
      error("queryPoint > numVectors-wL+1 in query");
    else{
      if(verbosity>1)
	cerr << "query point: " << queryPoint << endl; cerr.flush();
      query=query+queryPoint*dbH->dim;
      qNorm=qNorm+queryPoint;
      numVectors=wL;
    }
  
  double ** D = 0;    // Differences query and target 
  double ** DD = 0;   // Matched filter distance

  D = new double*[numVectors];
  assert(D);
  DD = new double*[numVectors];
  assert(DD);

  gettimeofday(&tv1, NULL); 
  processedTracks=0;
  unsigned successfulTracks=0;

  double* qp;
  double* sp;
  double* dp;
  double diffL2;

  // build track offset table
  unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
  unsigned cumTrack=0;
  unsigned trackIndexOffset;
  for(k=0; k<dbH->numFiles;k++){
    trackOffsetTable[k]=cumTrack;
    cumTrack+=trackTable[k]*dbH->dim;
  }

  char nextKey [MAXSTR];

  // chi^2 statistics
  double sampleCount = 0;
  double sampleSum = 0;
  double logSampleSum = 0;
  double minSample = 1e9;
  double maxSample = 0;

  // Track loop 
  for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){

    // get trackID from file if using a control file
    if(trackFile){
      if(!trackFile->eof()){
	trackFile->getline(nextKey,MAXSTR);
	track=getKeyPos(nextKey);
      }
      else
	break;
    }

    trackOffset=trackOffsetTable[track];     // numDoubles offset
    trackIndexOffset=trackOffset/dbH->dim; // numVectors offset

    if(sequenceLength<trackTable[track]){  // test for short sequences
      
      if(verbosity>7)
	cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();
		
      // Sum products matrix
      for(j=0; j<numVectors;j++){
	D[j]=new double[trackTable[track]]; 
	assert(D[j]);

      }

      // Matched filter matrix
      for(j=0; j<numVectors;j++){
	DD[j]=new double[trackTable[track]];
	assert(DD[j]);
      }

      double tmp;
      // Dot product
      for(j=0; j<numVectors; j++)
	for(k=0; k<trackTable[track]; k++){
	  qp=query+j*dbH->dim;
	  sp=dataBuf+trackOffset+k*dbH->dim;
	  DD[j][k]=0.0; // Initialize matched filter array
	  dp=&D[j][k];  // point to correlation cell j,k
	  *dp=0.0;      // initialize correlation cell
	  l=dbH->dim;         // size of vectors
	  while(l--)
	    *dp+=*qp++**sp++;
	}
  
      // Matched Filter
      // HOP SIZE == 1
      double* spd;
      if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
	for(w=0; w<wL; w++)
	  for(j=0; j<numVectors-w; j++){ 
	    sp=DD[j];
	    spd=D[j+w]+w;
	    k=trackTable[track]-w;
	    while(k--)
	      *sp+++=*spd++;
	  }
      }

      else{ // HOP_SIZE != 1
	for(w=0; w<wL; w++)
	  for(j=0; j<numVectors-w; j+=HOP_SIZE){
	    sp=DD[j];
	    spd=D[j+w]+w;
	    for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
	      *sp+=*spd;
	      sp+=HOP_SIZE;
	      spd+=HOP_SIZE;
	    }
	  }
      }
      
      if(verbosity>3 && usingTimes){
	cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
	cerr.flush();
      }

      if(!usingTimes || 
	 (usingTimes 
	  && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){

	if(verbosity>3 && usingTimes){
	  cerr << "within duration tolerance." << endl;
	  cerr.flush();
	}

	// Search for minimum distance by shingles (concatenated vectors)
	for(j=0;j<numVectors-wL;j+=HOP_SIZE)
	  for(k=0;k<trackTable[track]-wL;k+=HOP_SIZE){
	    thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
	    if(verbosity>10)
	      cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl;
	    // Gather chi^2 statistics
	    if(thisDist<minSample)
	      minSample=thisDist;
	    else if(thisDist>maxSample)
	      maxSample=thisDist;
	    if(thisDist>1e-9){
	      sampleCount++;
	      sampleSum+=thisDist;
	      logSampleSum+=log(thisDist);
	    }

	    // diffL2 = fabs(qNorm[j] - sNorm[trackIndexOffset+k]);
	    // Power test
	    if(!USE_THRESH || 
	       // Threshold on mean L2 of Q and S sequences
	       (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && 
		// Are both query and target windows above mean energy?
		(qNorm[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // &&  diffL2 < DIFF_THRESH )))
	      thisDist=thisDist; // Computed above
	    else
	      thisDist=1000000.0;
	    if(thisDist>=0 && thisDist<=radius){
	      distances[0]++; // increment count
	      break; // only need one track point per query point
	    }
	  }
	// How many points were below threshold ?
	thisDist=distances[0];
	
	// Let's see the distances then...
	if(verbosity>3)
	  cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;

	// All the track stuff goes here
	n=trackNN;
	while(n--){
	  if(thisDist>trackDistances[n]){
	    if((n==0 || thisDist<=trackDistances[n-1])){
	      // Copy all values above up the queue
	      for( l=trackNN-1 ; l > n ; l--){
		trackDistances[l]=trackDistances[l-1];
		trackQIndexes[l]=trackQIndexes[l-1];
		trackSIndexes[l]=trackSIndexes[l-1];
		trackIDs[l]=trackIDs[l-1];
	      }
	      trackDistances[n]=thisDist;
	      trackQIndexes[n]=qIndexes[0];
	      trackSIndexes[n]=sIndexes[0];
	      successfulTracks++;
	      trackIDs[n]=track;
	      break;
	    }
	  }
	  else
	    break;
	}
      } // Duration match
            
      // Clean up current track
      if(D!=NULL){
	for(j=0; j<numVectors; j++)
	  delete[] D[j];
      }

      if(DD!=NULL){
	for(j=0; j<numVectors; j++)
	  delete[] DD[j];
      }
    }
    // per-track reset array values
    for(unsigned k=0; k<pointNN; k++){
      distances[k]=0.0;
      qIndexes[k]=~0;
      sIndexes[k]=~0;    
    }
  }

  gettimeofday(&tv2,NULL);
  if(verbosity>1){
    cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" 
	 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
    cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum 
	 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
  }
  
  if(adbQueryResult==0){
    if(verbosity>1)
      cerr<<endl;
    // Output answer
    // Loop over nearest neighbours
    for(k=0; k < min(trackNN,successfulTracks); k++)
      cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << endl;
  }
  else{ // Process Web Services Query
    int listLen = min(trackNN, processedTracks);
    adbQueryResult->__sizeRlist=listLen;
    adbQueryResult->__sizeDist=listLen;
    adbQueryResult->__sizeQpos=listLen;
    adbQueryResult->__sizeSpos=listLen;
    adbQueryResult->Rlist= new char*[listLen];
    adbQueryResult->Dist = new double[listLen];
    adbQueryResult->Qpos = new int[listLen];
    adbQueryResult->Spos = new int[listLen];
    for(k=0; k<adbQueryResult->__sizeRlist; k++){
      adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
      adbQueryResult->Dist[k]=trackDistances[k];
      adbQueryResult->Qpos[k]=trackQIndexes[k];
      adbQueryResult->Spos[k]=trackSIndexes[k];
      sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
    }
  }


  // Clean up
  if(trackOffsetTable)
    delete[] trackOffsetTable;
  if(queryCopy)
    delete[] queryCopy;
  //if(qNorm)
  //delete qNorm;
  if(D)
    delete[] D;
  if(DD)
    delete[] DD;
  if(timesdata)
    delete[] timesdata;
  if(meanDBdur)
    delete[] meanDBdur;


}

void audioDB::normalize(double* X, int dim, int n){
  unsigned c = n*dim;
  double minval,maxval,v,*p;

  p=X;  
  while(c--){
    v=*p++;
    if(v<minval)
      minval=v;
    else if(v>maxval)
      maxval=v;
  }

  normalize(X, dim, n, minval, maxval);

}

void audioDB::normalize(double* X, int dim, int n, double minval, double maxval){
  unsigned c = n*dim;
  double *p;


  if(maxval==minval)
    return;

  maxval=1.0/(maxval-minval);
  c=n*dim;
  p=X;

  while(c--){
    *p=(*p-minval)*maxval;
    p++;
  }
}

// Unit norm block of features
void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
  unsigned d;
  double L2, oneOverL2, *p;
  if(verbosity>2)
    cerr << "norming " << n << " vectors...";cerr.flush();
  while(n--){
    p=X;
    L2=0.0;
    d=dim;
    while(d--){
      L2+=*p**p;
      p++;
    }
    /*    L2=sqrt(L2);*/
    if(qNorm)
      *qNorm++=L2;
    /*
    oneOverL2 = 1.0/L2;
    d=dim;
    while(d--){
      *X*=oneOverL2;
      X++;
    */
    X+=dim;
  }
  if(verbosity>2)
    cerr << "done..." << endl;
}

// Unit norm block of features
void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
  unsigned d;
  double L2, oneOverL2, *p;
  unsigned nn = n;

  assert(l2normTable);

  if( !append && (dbH->flags & O2_FLAG_L2NORM) )
    error("Database is already L2 normed", "automatic norm on insert is enabled");

  if(verbosity>2)
    cerr << "norming " << n << " vectors...";cerr.flush();

  double* l2buf = new double[n];
  double* l2ptr = l2buf;
  assert(l2buf);
  assert(X);

  while(nn--){
    p=X;
    *l2ptr=0.0;
    d=dim;
    while(d--){
      *l2ptr+=*p**p;
      p++;
    }
    l2ptr++;
    /*
      oneOverL2 = 1.0/(*l2ptr++);
      d=dim;
      while(d--){
      *X*=oneOverL2;
      X++;
      }
    */
    X+=dim;
  }
  unsigned offset;
  if(append)
    offset=dbH->length/(dbH->dim*sizeof(double)); // number of vectors
  else
    offset=0;
  memcpy(l2normTable+offset, l2buf, n*sizeof(double));
  if(l2buf)
    delete[] l2buf;
  if(verbosity>2)
    cerr << "done..." << endl;
}


// Start an audioDB server on the host
void audioDB::startServer(){
  struct soap soap;
  int m, s; // master and slave sockets
  soap_init(&soap);
  m = soap_bind(&soap, NULL, port, 100);
  if (m < 0)
    soap_print_fault(&soap, stderr);
  else
    {
      fprintf(stderr, "Socket connection successful: master socket = %d\n", m);
      for (int i = 1; ; i++)
	{
	  s = soap_accept(&soap);
	  if (s < 0)
	    {
	      soap_print_fault(&soap, stderr);
	      break;
	    }
	  fprintf(stderr, "%d: accepted connection from IP=%d.%d.%d.%d socket=%d\n", i,
		  (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s);
	  if (soap_serve(&soap) != SOAP_OK) // process RPC request
	    soap_print_fault(&soap, stderr); // print error
	  fprintf(stderr, "request served\n");
	  soap_destroy(&soap); // clean up class instances
	  soap_end(&soap); // clean up everything and close socket
	}
    }
  soap_done(&soap); // close master socket and detach environment
} 


// web services

// SERVER SIDE
int adb__status(struct soap* soap, xsd__string dbName, xsd__int &adbCreateResult){
  char* const argv[]={"audioDB",COM_STATUS,"-d",dbName};
  const unsigned argc = 4;
  audioDB(argc,argv);
  adbCreateResult=100;
  return SOAP_OK;
}

// Literal translation of command line to web service

int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResult &adbQueryResult){
  char queryType[256];
  for(int k=0; k<256; k++)
    queryType[k]='\0';
  if(qType == O2_FLAG_POINT_QUERY)
    strncpy(queryType, "point", strlen("point"));
  else if (qType == O2_FLAG_SEQUENCE_QUERY)
    strncpy(queryType, "sequence", strlen("sequence"));
  else if(qType == O2_FLAG_TRACK_QUERY)
    strncpy(queryType,"track", strlen("track"));
  else
    strncpy(queryType, "", strlen(""));

  if(pointNN==0)
    pointNN=10;
  if(trackNN==0)
    trackNN=10;
  if(seqLen==0)
    seqLen=16;

  char qPosStr[256];
  sprintf(qPosStr, "%d", qPos);
  char pointNNStr[256];
  sprintf(pointNNStr,"%d",pointNN);
  char trackNNStr[256];
  sprintf(trackNNStr,"%d",trackNN);
  char seqLenStr[256];  
  sprintf(seqLenStr,"%d",seqLen);
  
  const  char* argv[] ={
    "./audioDB", 
    COM_QUERY, 
    queryType, // Need to pass a parameter
    COM_DATABASE,
    dbName, 
    COM_FEATURES,
    qKey, 
    COM_KEYLIST,
    keyList==0?"":keyList,
    COM_TIMES,
    timesFileName==0?"":timesFileName,
    COM_QPOINT, 
    qPosStr,
    COM_POINTNN,
    pointNNStr,
    COM_TRACKNN,
    trackNNStr, // Need to pass a parameter
    COM_SEQLEN,
    seqLenStr
  };

  const unsigned argc = 19;
  audioDB(argc, (char* const*)argv, &adbQueryResult);
  return SOAP_OK;
}

int main(const unsigned argc, char* const argv[]){
  audioDB(argc, argv);
}