Mercurial > hg > audiodb

/* audioDB.cpp

audioDB version 1.0

A feature vector database management system for content-based retrieval.

Usage: audioDB [OPTIONS]...

      --full-help              Print help, including hidden options, and exit
  -V, --version                Print version and exit
  -H, --help                   print help on audioDB usage and exit.
  -v, --verbosity=detail       level of detail of operational information.
                                 (default=`1')

Database Setup:
  All database operations require a database argument.

  Database commands are UPPER CASE. Command options are lower case.

  -d, --database=filename      database file required by Database commands.
  -N, --NEW                    make a new (initially empty) database.
  -S, --STATUS                 output database information to stdout.
  -D, --DUMP                   output all entries: index key size.
  -L, --L2NORM                 unit norm vectors and norm all future inserts.

Database Insertion:
  The following commands insert feature files, with optional keys and
  timestamps.

  -I, --INSERT                 add feature vectors to an existing database.
  -U, --UPDATE                 replace inserted vectors associated with key
                                 with new input vectors.
  -f, --features=filename      binary series of vectors file {int sz:ieee
                                 double[][sz]:eof}.
  -t, --times=filename         list of time points (ascii) for feature vectors.
  -k, --key=identifier         unique identifier associated with features.

  -B, --BATCHINSERT            add feature vectors named in a --featureList
                                 file (with optional keys in a --keyList file)
                                 to the named database.
  -F, --featureList=filename   text file containing list of binary feature
                                 vector files to process
  -T, --timesList=filename     text file containing list of ascii --times for
                                 each --features file in --featureList.
  -K, --keyList=filename       text file containing list of unique identifiers
                                 associated with --features.

Database Search:
  Thse commands control the retrieval behaviour.

  -Q, --QUERY=searchtype       content-based search on --database using
                                 --features as a query. Optionally restrict the
                                 search to those tracks identified in a
                                 --keyList.  (possible values="point",
                                 "track", "sequence")
  -p, --qpoint=position        ordinal position of query start point in
                                 --features file.  (default=`0')
  -e, --exhaustive             exhaustive search: iterate through all query
                                 vectors in search. Overrides --qpoint.
                                 (default=off)
  -n, --pointnn=numpoints      number of point nearest neighbours to use in
                                 retrieval.  (default=`10')
  -R, --radius=DOUBLE          radius search, returns all
                                 points/tracks/sequences inside given radius.
                                  (default=`1.0')
  -x, --expandfactor=DOUBLE    time compress/expand factor of result length to
                                 query length [1.0 .. 100.0].  (default=`1.1')
  -o, --rotate                 rotate query vectors for rotationally invariant
                                 search.  (default=off)
  -r, --resultlength=length    maximum length of the result list.
                                 (default=`10')
  -l, --sequencelength=length  length of sequences for sequence search.
                                 (default=`16')
  -h, --sequencehop=hop        hop size of sequence window for sequence search.
                                  (default=`1')

Web Services:
  These commands enable the database process to establish a connection via the
  internet and operate as separate client and server processes.

  -s, --SERVER=port            run as standalone web service on named port.
                                 (default=`80011')
  -c, --client=hostname:port   run as a client using named host service.

  Copyright (C) 2007 Michael Casey, Goldsmiths, University of London

  outputs:

  key1 distance1 qpos1 spos1
  key2 distance2 qpos2 spos2
  ...
  keyN distanceN qposN sposN

*/

#include "audioDB.h"

#define O2_DEBUG

void audioDB::error(const char* a, const char* b, const char *sysFunc) {
  cerr << a << ": " << b << endl;
  if (sysFunc) {
    perror(sysFunc);
  }
  exit(1);
}

audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult):
  dim(0),
  dbName(0),
  inFile(0),
  key(0),
  trackFile(0),
  trackFileName(0),
  timesFile(0),
  timesFileName(0),
  usingTimes(0),
  command(0),
  dbfid(0),
  db(0),
  dbH(0),
  infid(0),
  indata(0),
  queryType(O2_FLAG_POINT_QUERY),
  verbosity(1),
  pointNN(O2_DEFAULT_POINTNN),
  trackNN(O2_DEFAULT_TRACKNN),
  trackTable(0),
  fileTable(0),
  dataBuf(0),
  l2normTable(0),
  timesTable(0),
  qNorm(0),
  sequenceLength(16),
  sequenceHop(1),
  queryPoint(0),
  usingQueryPoint(0),
  isClient(0),
  isServer(0),
  port(0),
  timesTol(0.1),
  radius(0){

  if(processArgs(argc, argv)<0){
    printf("No command found.\n");
    cmdline_parser_print_version ();
    if (strlen(gengetopt_args_info_purpose) > 0)
      printf("%s\n", gengetopt_args_info_purpose);
    printf("%s\n", gengetopt_args_info_usage);
    printf("%s\n", gengetopt_args_info_help[1]);
    printf("%s\n", gengetopt_args_info_help[2]);
    printf("%s\n", gengetopt_args_info_help[0]);
    exit(1);
  }

  if(O2_ACTION(COM_SERVER))
    startServer();

  else  if(O2_ACTION(COM_CREATE))
    create(dbName);

  else if(O2_ACTION(COM_INSERT))
    insert(dbName, inFile);

  else if(O2_ACTION(COM_BATCHINSERT))
    batchinsert(dbName, inFile);

  else if(O2_ACTION(COM_QUERY))
    if(isClient)
      ws_query(dbName, inFile, (char*)hostport);
    else
      query(dbName, inFile, adbQueryResult);

  else if(O2_ACTION(COM_STATUS))
    if(isClient)
      ws_status(dbName,(char*)hostport);
    else
      status(dbName);

  else if(O2_ACTION(COM_L2NORM))
    l2norm(dbName);

  else if(O2_ACTION(COM_DUMP))
    dump(dbName);

  else
    error("Unrecognized command",command);
}

audioDB::~audioDB(){
  // Clean up
  if(indata)
    munmap(indata,statbuf.st_size);
  if(db)
    munmap(db,O2_DEFAULTDBSIZE);
  if(dbfid>0)
    close(dbfid);
  if(infid>0)
    close(infid);
  if(dbH)
    delete dbH;
}

int audioDB::processArgs(const unsigned argc, char* const argv[]){

  if(argc<2){
    cmdline_parser_print_version ();
    if (strlen(gengetopt_args_info_purpose) > 0)
      printf("%s\n", gengetopt_args_info_purpose);
    printf("%s\n", gengetopt_args_info_usage);
    printf("%s\n", gengetopt_args_info_help[1]);
    printf("%s\n", gengetopt_args_info_help[2]);
    printf("%s\n", gengetopt_args_info_help[0]);
    exit(0);
  }

  if (cmdline_parser (argc, argv, &args_info) != 0)
    exit(1) ;

  if(args_info.help_given){
    cmdline_parser_print_help();
    exit(0);
  }

  if(args_info.verbosity_given){
    verbosity=args_info.verbosity_arg;
    if(verbosity<0 || verbosity>10){
      cerr << "Warning: verbosity out of range, setting to 1" << endl;
      verbosity=1;
    }
  }

  if(args_info.radius_given){
    radius=args_info.radius_arg;
    if(radius<=0 || radius>1000000000){
      cerr << "Warning: radius out of range" << endl;
      exit(1);
    }
    else
      if(verbosity>3)
	cerr << "Setting radius to " << radius << endl;
  }

  if(args_info.SERVER_given){
    command=COM_SERVER;
    port=args_info.SERVER_arg;
    if(port<100 || port > 100000)
      error("port out of range");
    isServer=1;
    return 0;
  }

  // No return on client command, find database command
 if(args_info.client_given){
   command=COM_CLIENT;
   hostport=args_info.client_arg;
   isClient=1;
 }

 if(args_info.NEW_given){
   command=COM_CREATE;
   dbName=args_info.database_arg;
   return 0;
 }

 if(args_info.STATUS_given){
   command=COM_STATUS;
   dbName=args_info.database_arg;
   return 0;
 }

 if(args_info.DUMP_given){
   command=COM_DUMP;
   dbName=args_info.database_arg;
   return 0;
 }

 if(args_info.L2NORM_given){
   command=COM_L2NORM;
   dbName=args_info.database_arg;
   return 0;
 }

 if(args_info.INSERT_given){
   command=COM_INSERT;
   dbName=args_info.database_arg;
   inFile=args_info.features_arg;
   if(args_info.key_given)
     key=args_info.key_arg;
   if(args_info.times_given){
     timesFileName=args_info.times_arg;
     if(strlen(timesFileName)>0){
       if(!(timesFile = new ifstream(timesFileName,ios::in)))
	 error("Could not open times file for reading", timesFileName);
       usingTimes=1;
     }
   }
   return 0;
 }

 if(args_info.BATCHINSERT_given){
   command=COM_BATCHINSERT;
   dbName=args_info.database_arg;
   inFile=args_info.featureList_arg;
   if(args_info.keyList_given)
     key=args_info.keyList_arg; // INCONSISTENT NO CHECK

   /* TO DO: REPLACE WITH
      if(args_info.keyList_given){
      trackFileName=args_info.keyList_arg;
      if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in)))
      error("Could not open keyList file for reading",trackFileName);
      }
      AND UPDATE BATCHINSERT()
   */

   if(args_info.timesList_given){
     timesFileName=args_info.timesList_arg;
     if(strlen(timesFileName)>0){
       if(!(timesFile = new ifstream(timesFileName,ios::in)))
	 error("Could not open timesList file for reading", timesFileName);
       usingTimes=1;
     }
   }
   return 0;
 }

 // Query command and arguments
 if(args_info.QUERY_given){
   command=COM_QUERY;
   dbName=args_info.database_arg;
   inFile=args_info.features_arg;

   if(args_info.keyList_given){
     trackFileName=args_info.keyList_arg;
     if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in)))
       error("Could not open keyList file for reading",trackFileName);
   }

   if(args_info.times_given){
     timesFileName=args_info.times_arg;
     if(strlen(timesFileName)>0){
       if(!(timesFile = new ifstream(timesFileName,ios::in)))
	 error("Could not open times file for reading", timesFileName);
       usingTimes=1;
     }
   }

   // query type
   if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0)
     queryType=O2_FLAG_TRACK_QUERY;
   else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0)
     queryType=O2_FLAG_POINT_QUERY;
   else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0)
     queryType=O2_FLAG_SEQUENCE_QUERY;
   else
     error("unsupported query type",args_info.QUERY_arg);

   if(!args_info.exhaustive_flag){
     queryPoint = args_info.qpoint_arg;
     usingQueryPoint=1;
     if(queryPoint<0 || queryPoint >10000)
       error("queryPoint out of range: 0 <= queryPoint <= 10000");
   }


   pointNN=args_info.pointnn_arg;
   if(pointNN<1 || pointNN >1000)
     error("pointNN out of range: 1 <= pointNN <= 1000");


   trackNN=args_info.resultlength_arg;
   if(trackNN<1 || trackNN >10000)
     error("resultlength out of range: 1 <= resultlength <= 1000");


   sequenceLength=args_info.sequencelength_arg;
   if(sequenceLength<1 || sequenceLength >1000)
     error("seqlen out of range: 1 <= seqlen <= 1000");

   sequenceHop=args_info.sequencehop_arg;
   if(sequenceHop<1 || sequenceHop >1000)
     error("seqhop out of range: 1 <= seqhop <= 1000");

   return 0;
 }
 return -1; // no command found
}

/* Make a new database

   The database consists of:

   header
   ---------------------------------------------------------------------------------
   | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes |
   ---------------------------------------------------------------------------------


   keyTable : list of keys of tracks
   --------------------------------------------------------------------------
   | key 256 bytes                                                          |
   --------------------------------------------------------------------------
   O2_MAXFILES*02_FILENAMELENGTH

   trackTable : Maps implicit feature index to a feature vector matrix
   --------------------------------------------------------------------------
   | numVectors (4 bytes)                                                   |
   --------------------------------------------------------------------------
   O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(INT)

   featureTable
   --------------------------------------------------------------------------
   | v1 v2 v3 ... vd (double)                                               |
   --------------------------------------------------------------------------
   O2_MAXFILES * 02_MEANNUMFEATURES * DIM * sizeof(DOUBLE)

   timesTable
   --------------------------------------------------------------------------
   | timestamp (double)                                                     |
   --------------------------------------------------------------------------
   O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE)

   l2normTable
   --------------------------------------------------------------------------
   | nm (double)                                                            |
   --------------------------------------------------------------------------
   O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE)

*/

void audioDB::get_lock(int fd, bool exclusive) {
  struct flock lock;
  int status;

  lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
  lock.l_whence = SEEK_SET;
  lock.l_start = 0;
  lock.l_len = 0; /* "the whole file" */

 retry:
  do {
    status = fcntl(fd, F_SETLKW, &lock);
  } while (status != 0 && errno == EINTR);

  if (status) {
    if (errno == EAGAIN) {
      sleep(1);
      goto retry;
    } else {
      error("fcntl lock error", "", "fcntl");
    }
  }
}

void audioDB::release_lock(int fd) {
  struct flock lock;
  int status;

  lock.l_type = F_UNLCK;
  lock.l_whence = SEEK_SET;
  lock.l_start = 0;
  lock.l_len = 0;

  status = fcntl(fd, F_SETLKW, &lock);

  if (status)
    error("fcntl unlock error", "", "fcntl");
}

void audioDB::create(const char* dbName){
  if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
    error("Can't create database file", dbName, "open");
  get_lock(dbfid, 1);

  // go to the location corresponding to the last byte
  if (lseek (dbfid, O2_DEFAULTDBSIZE - 1, SEEK_SET) == -1)
    error("lseek error in db file", "", "lseek");

  // write a dummy byte at the last location
  if (write (dbfid, "", 1) != 1)
    error("write error", "", "write");

  // mmap the output file
  if(verbosity)
    cerr << "header size:" << O2_HEADERSIZE << endl;
  if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
			 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
    error("mmap error for creating database", "", "mmap");

  dbH = new dbTableHeaderT();
  assert(dbH);

  // Initialize header
  dbH->magic=O2_MAGIC;
  dbH->numFiles=0;
  dbH->length=0;
  dbH->dim=0;
  dbH->flags=0; //O2_FLAG_L2NORM;

  memcpy (db, dbH, O2_HEADERSIZE);
  if(verbosity)
    cerr << COM_CREATE << " " << dbName << endl;

}


void audioDB::drop(){


}

// initTables - memory map files passed as arguments
// Precondition: database has already been created
void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile=0){
  if ((dbfid = open (dbName, forWrite ? O_RDWR : O_RDONLY)) < 0)
    error("Can't open database file", dbName, "open");
  get_lock(dbfid, forWrite);

  // open the input file
  if (inFile && (infid = open (inFile, O_RDONLY)) < 0)
    error("can't open input file for reading", inFile, "open");

  // find size of input file
  if (inFile && fstat (infid,&statbuf) < 0)
    error("fstat error finding size of input", "", "fstat");

  // Get the database header info
  dbH = new dbTableHeaderT();
  assert(dbH);

  if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT))
    error("error reading db header");

  fileTableOffset = O2_HEADERSIZE;
  trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES;
  dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES;
  l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
  timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);

  if(dbH->magic!=O2_MAGIC){
    cerr << "expected: " << O2_MAGIC << ", got:" << dbH->magic << endl;
    error("database file has incorrect header",dbName);
  }

  if(inFile)
    if(dbH->dim==0 && dbH->length==0) // empty database
      read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality
    else {
      unsigned test;
      read(infid,&test,sizeof(unsigned));
      if(dbH->dim!=test){
	cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl;
	error("feature dimensions do not match database table dimensions");
      }
    }

  // mmap the input file
  if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
      == (caddr_t) -1)
    error("mmap error for input", "", "mmap");

  // mmap the database file
  if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0),
			 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
    error("mmap error for initting tables of database", "", "mmap");

  // Make some handy tables with correct types
  fileTable= (char*)(db+fileTableOffset);
  trackTable = (unsigned*)(db+trackTableOffset);
  dataBuf  = (double*)(db+dataoffset);
  l2normTable = (double*)(db+l2normTableOffset);
  timesTable = (double*)(db+timesTableOffset);

}

void audioDB::insert(const char* dbName, const char* inFile){

  initTables(dbName, 1, inFile);

  if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    error("Must use timestamps with timestamped database","use --times");

  // Check that there is room for at least 1 more file
  if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int)))
    error("No more room in database","insert failed: reason database is full.");

  if(!key)
    key=inFile;
  // Linear scan of filenames check for pre-existing feature
  unsigned alreadyInserted=0;
  for(unsigned k=0; k<dbH->numFiles; k++)
    if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){
      alreadyInserted=1;
      break;
    }

  if(alreadyInserted){
    if(verbosity)
      cerr << "Warning: key already exists in database, ignoring: " <<inFile << endl;
    return;
  }

  // Make a track index table of features to file indexes
  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
  if(!numVectors){
    if(verbosity)
      cerr << "Warning: ignoring zero-length feature vector file:" << key << endl;
    // CLEAN UP
    munmap(indata,statbuf.st_size);
    munmap(db,O2_DEFAULTDBSIZE);
    close(infid);
    return;
  }

  strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));

  unsigned insertoffset = dbH->length;// Store current state

  // Check times status and insert times from file
  unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
  double* timesdata=timesTable+timesoffset;
  assert(timesdata+numVectors<l2normTable);
  insertTimeStamps(numVectors, timesFile, timesdata);

  // Increment file count
  dbH->numFiles++;

  // Update Header information
  dbH->length+=(statbuf.st_size-sizeof(int));

  // Copy the header back to the database
  memcpy (db, dbH, sizeof(dbTableHeaderT));

  // Update track to file index map
  //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
  memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));

  // Update the feature database
  memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));

  // Norm the vectors on input if the database is already L2 normed
  if(dbH->flags & O2_FLAG_L2NORM)
    unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append

  // Report status
  status(dbName);
  if(verbosity)
    cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
	 << (statbuf.st_size-sizeof(int)) << " bytes." << endl;

  // CLEAN UP
  munmap(indata,statbuf.st_size);
  close(infid);
}

void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){
  unsigned numtimes=0;
 if(usingTimes){
   if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles)
     dbH->flags=dbH->flags|O2_FLAG_TIMES;
   else if(!(dbH->flags&O2_FLAG_TIMES)){
     cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl;
     usingTimes=0;
   }

   if(!timesFile->is_open()){
     if(dbH->flags & O2_FLAG_TIMES){
       munmap(indata,statbuf.st_size);
       munmap(db,O2_DEFAULTDBSIZE);
       error("problem opening times file on timestamped database",timesFileName);
     }
     else{
       cerr << "Warning: problem opening times file. But non-timestamped database, so ignoring times file." << endl;
       usingTimes=0;
     }
   }

    // Process time file
   if(usingTimes){
     do{
       *timesFile>>*timesdata++;
       if(timesFile->eof())
	 break;
       numtimes++;
     }while(!timesFile->eof() && numtimes<numVectors);
     if(!timesFile->eof()){
	double dummy;
	do{
	  *timesFile>>dummy;
	  if(timesFile->eof())
	    break;
	  numtimes++;
	}while(!timesFile->eof());
     }
     if(numtimes<numVectors || numtimes>numVectors+2){
       munmap(indata,statbuf.st_size);
       munmap(db,O2_DEFAULTDBSIZE);
       close(infid);
       cerr << "expected " << numVectors << " found " << numtimes << endl;
       error("Times file is incorrect length for features file",inFile);
     }
     if(verbosity>2)
       cerr << "numtimes: " << numtimes << endl;
   }
 }
}

void audioDB::batchinsert(const char* dbName, const char* inFile){

  if ((dbfid = open (dbName, O_RDWR)) < 0)
    error("Can't open database file", dbName, "open");
  get_lock(dbfid, 1);

  if(!key)
    key=inFile;
  ifstream *filesIn = 0;
  ifstream *keysIn = 0;
  ifstream* thisTimesFile = 0;

  if(!(filesIn = new ifstream(inFile)))
    error("Could not open batch in file", inFile);
  if(key && key!=inFile)
    if(!(keysIn = new ifstream(key)))
      error("Could not open batch key file",key);

  // Get the database header info
  dbH = new dbTableHeaderT();
  assert(dbH);

  if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT))
    error("error reading db header");

  if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    error("Must use timestamps with timestamped database","use --times");

  fileTableOffset = O2_HEADERSIZE;
  trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES;
  dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES;
  l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);
  timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double);

  if(dbH->magic!=O2_MAGIC){
    cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl;
    error("database file has incorrect header",dbName);
  }


  unsigned totalVectors=0;
  char *thisKey = new char[MAXSTR];
  char *thisFile = new char[MAXSTR];
  char *thisTimesFileName = new char[MAXSTR];

  do{
    filesIn->getline(thisFile,MAXSTR);
    if(key && key!=inFile)
      keysIn->getline(thisKey,MAXSTR);
    else
      thisKey = thisFile;
    if(usingTimes)
      timesFile->getline(thisTimesFileName,MAXSTR);

    if(filesIn->eof())
      break;

    // open the input file
    if (thisFile && (infid = open (thisFile, O_RDONLY)) < 0)
      error("can't open feature file for reading", thisFile, "open");

    // find size of input file
    if (thisFile && fstat (infid,&statbuf) < 0)
      error("fstat error finding size of input", "", "fstat");

    // mmap the database file
    if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
			   MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
      error("mmap error for batchinsert into database", "", "mmap");

    // Make some handy tables with correct types
    fileTable= (char*)(db+fileTableOffset);
    trackTable = (unsigned*)(db+trackTableOffset);
    dataBuf  = (double*)(db+dataoffset);
    l2normTable = (double*)(db+l2normTableOffset);
    timesTable = (double*)(db+timesTableOffset);

    // Check that there is room for at least 1 more file
    if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int))))
      error("No more room in database","insert failed: reason database is full.");

    if(thisFile)
      if(dbH->dim==0 && dbH->length==0) // empty database
	read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality
      else {
	unsigned test;
	read(infid,&test,sizeof(unsigned));
	if(dbH->dim!=test){
	  cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl;
	  error("feature dimensions do not match database table dimensions");
	}
      }

    // mmap the input file
    if (thisFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
	== (caddr_t) -1)
      error("mmap error for input", "", "mmap");


    // Linear scan of filenames check for pre-existing feature
    unsigned alreadyInserted=0;

    for(unsigned k=0; k<dbH->numFiles; k++)
      if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){
	alreadyInserted=1;
	break;
      }

    if(alreadyInserted){
      if(verbosity)
	cerr << "Warning: key already exists in database:" << thisKey << endl;
    }
    else{

      // Make a track index table of features to file indexes
      unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
      if(!numVectors){
	if(verbosity)
	  cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl;
      }
      else{
	if(usingTimes){
	  if(timesFile->eof())
	    error("not enough timestamp files in timesList");
	  thisTimesFile=new ifstream(thisTimesFileName,ios::in);
	  if(!thisTimesFile->is_open())
	    error("Cannot open timestamp file",thisTimesFileName);
	  unsigned insertoffset=dbH->length;
	  unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double));
	  double* timesdata=timesTable+timesoffset;
	  assert(timesdata+numVectors<l2normTable);
	  insertTimeStamps(numVectors,thisTimesFile,timesdata);
	  if(thisTimesFile)
	    delete thisTimesFile;
	}

	strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));

	unsigned insertoffset = dbH->length;// Store current state

	// Increment file count
	dbH->numFiles++;

	// Update Header information
	dbH->length+=(statbuf.st_size-sizeof(int));
	// Copy the header back to the database
	memcpy (db, dbH, sizeof(dbTableHeaderT));

	// Update track to file index map
	//memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned));
	memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));

	// Update the feature database
	memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int));

	// Norm the vectors on input if the database is already L2 normed
	if(dbH->flags & O2_FLAG_L2NORM)
	  unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append

	totalVectors+=numVectors;
      }
    }
    // CLEAN UP
    munmap(indata,statbuf.st_size);
    close(infid);
    munmap(db,O2_DEFAULTDBSIZE);
  }while(!filesIn->eof());

  // mmap the database file
  if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
			 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
    error("mmap error for creating database", "", "mmap");

  if(verbosity)
    cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
	 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl;

  // Report status
  status(dbName);

  munmap(db,O2_DEFAULTDBSIZE);
}

void audioDB::ws_status(const char*dbName, char* hostport){
  struct soap soap;
  int adbStatusResult;

  // Query an existing adb database
  soap_init(&soap);
  if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResult)==SOAP_OK)
    std::cout << "result = " << adbStatusResult << std::endl;
  else
    soap_print_fault(&soap,stderr);

  soap_destroy(&soap);
  soap_end(&soap);
  soap_done(&soap);
}

void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){
  struct soap soap;
  adb__queryResult adbQueryResult;

  soap_init(&soap);
  if(soap_call_adb__query(&soap,hostport,NULL,
			  (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName,
			  queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResult)==SOAP_OK){
    //std::cerr << "result list length:" << adbQueryResult.__sizeRlist << std::endl;
    for(int i=0; i<adbQueryResult.__sizeRlist; i++)
      std::cout << adbQueryResult.Rlist[i] << " " << adbQueryResult.Dist[i]
		<< " " << adbQueryResult.Qpos[i] << " " << adbQueryResult.Spos[i] << std::endl;
  }
  else
    soap_print_fault(&soap,stderr);

  soap_destroy(&soap);
  soap_end(&soap);
  soap_done(&soap);

}


void audioDB::status(const char* dbName){
  if(!dbH)
    initTables(dbName, 0, 0);

  // Update Header information
  cout << "num files:" << dbH->numFiles << endl;
  cout << "data dim:" << dbH->dim <<endl;
  if(dbH->dim>0){
    cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl;
    cout << "vectors available:" << (timesTableOffset-(dataoffset+dbH->length))/(sizeof(double)*dbH->dim) << endl;
  }
  cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(timesTableOffset-dataoffset) << "%)" << endl;
  cout << "bytes available:" << timesTableOffset-(dataoffset+dbH->length) << " (" <<
    (100.0*(timesTableOffset-(dataoffset+dbH->length)))/(timesTableOffset-dataoffset) << "%)" << endl;
  cout << "flags:" << dbH->flags << endl;

  unsigned dudCount=0;
  unsigned nullCount=0;
  for(unsigned k=0; k<dbH->numFiles; k++){
    if(trackTable[k]<sequenceLength){
      dudCount++;
      if(!trackTable[k])
	nullCount++;
    }
  }
  cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl;
}


void audioDB::dump(const char* dbName){
  if(!dbH)
    initTables(dbName, 0, 0);

  for(unsigned k=0, j=0; k<dbH->numFiles; k++){
    cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl;
    j+=trackTable[k];
  }

  status(dbName);
}

void audioDB::l2norm(const char* dbName){
  initTables(dbName, true, 0);
  if(dbH->length>0){
    unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
    unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
  }
  // Update database flags
  dbH->flags = dbH->flags|O2_FLAG_L2NORM;
  memcpy (db, dbH, O2_HEADERSIZE);
}


void audioDB::query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
  switch(queryType){
  case O2_FLAG_POINT_QUERY:
    pointQuery(dbName, inFile, adbQueryResult);
    break;
  case O2_FLAG_SEQUENCE_QUERY:
    if(radius==0)
      trackSequenceQueryNN(dbName, inFile, adbQueryResult);
    else
      trackSequenceQueryRad(dbName, inFile, adbQueryResult);
    break;
  case O2_FLAG_TRACK_QUERY:
    trackPointQuery(dbName, inFile, adbQueryResult);
    break;
  default:
    error("unrecognized queryType in query()");

  }
}

//return ordinal position of key in keyTable
unsigned audioDB::getKeyPos(char* key){
  for(unsigned k=0; k<dbH->numFiles; k++)
    if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
      return k;
  error("Key not found",key);
  return O2_ERR_KEYNOTFOUND;
}

// Basic point query engine
void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){

  initTables(dbName, 0, inFile);

  // For each input vector, find the closest pointNN matching output vectors and report
  // we use stdout in this stub version
  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);

  double* query = (double*)(indata+sizeof(int));
  double* data = dataBuf;
  double* queryCopy = 0;

  if( dbH->flags & O2_FLAG_L2NORM ){
    // Make a copy of the query
    queryCopy = new double[numVectors*dbH->dim];
    qNorm = new double[numVectors];
    assert(queryCopy&&qNorm);
    memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
    unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
    query = queryCopy;
  }

  // Make temporary dynamic memory for results
  assert(pointNN>0 && pointNN<=O2_MAXNN);
  double distances[pointNN];
  unsigned qIndexes[pointNN];
  unsigned sIndexes[pointNN];
  for(unsigned k=0; k<pointNN; k++){
    distances[k]=0.0;
    qIndexes[k]=~0;
    sIndexes[k]=~0;
  }

  unsigned j=numVectors;
  unsigned k,l,n;
  double thisDist;

  unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double));
  double meanQdur = 0;
  double* timesdata = 0;
  double* dbdurs = 0;

  if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
    cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
    usingTimes=0;
  }

  else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;

  else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
    timesdata = new double[numVectors];
    insertTimeStamps(numVectors, timesFile, timesdata);
    // Calculate durations of points
    for(k=0; k<numVectors-1; k++){
      timesdata[k]=timesdata[k+1]-timesdata[k];
      meanQdur+=timesdata[k];
    }
    meanQdur/=k;
    // Individual exhaustive timepoint durations
    dbdurs = new double[totalVecs];
    for(k=0; k<totalVecs-1; k++)
      dbdurs[k]=timesTable[k+1]-timesTable[k];
    j--; // decrement vector counter by one
  }

  if(usingQueryPoint)
    if(queryPoint>numVectors-1)
      error("queryPoint > numVectors in query");
    else{
      if(verbosity>1)
	cerr << "query point: " << queryPoint << endl; cerr.flush();
      query=query+queryPoint*dbH->dim;
      numVectors=queryPoint+1;
      j=1;
    }

  gettimeofday(&tv1, NULL);
  while(j--){ // query
    data=dataBuf;
    k=totalVecs; // number of database vectors
    while(k--){  // database
      thisDist=0;
      l=dbH->dim;
      double* q=query;
      while(l--)
	thisDist+=*q++**data++;
      if(!usingTimes ||
	 (usingTimes
	  && fabs(dbdurs[totalVecs-k-1]-timesdata[numVectors-j-1])<timesdata[numVectors-j-1]*timesTol)){
	n=pointNN;
	while(n--){
	  if(thisDist>=distances[n]){
	    if((n==0 || thisDist<=distances[n-1])){
	      // Copy all values above up the queue
	      for( l=pointNN-1 ; l >= n+1 ; l--){
		distances[l]=distances[l-1];
		qIndexes[l]=qIndexes[l-1];
		sIndexes[l]=sIndexes[l-1];
	      }
	      distances[n]=thisDist;
	      qIndexes[n]=numVectors-j-1;
	      sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1;
	      break;
	    }
	  }
	  else
	    break;
	}
      }
    }
    // Move query pointer to next query point
    query+=dbH->dim;
  }

  gettimeofday(&tv2, NULL);
  if(verbosity>1)
    cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;

  if(adbQueryResult==0){
    // Output answer
    // Loop over nearest neighbours
    for(k=0; k < pointNN; k++){
      // Scan for key
      unsigned cumTrack=0;
      for(l=0 ; l<dbH->numFiles; l++){
	cumTrack+=trackTable[l];
	if(sIndexes[k]<cumTrack){
	  cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " "
	       << sIndexes[k]+trackTable[l]-cumTrack << endl;
	  break;
	}
      }
    }
  }
  else{ // Process Web Services Query
    int listLen = pointNN;
    adbQueryResult->__sizeRlist=listLen;
    adbQueryResult->__sizeDist=listLen;
    adbQueryResult->__sizeQpos=listLen;
    adbQueryResult->__sizeSpos=listLen;
    adbQueryResult->Rlist= new char*[listLen];
    adbQueryResult->Dist = new double[listLen];
    adbQueryResult->Qpos = new int[listLen];
    adbQueryResult->Spos = new int[listLen];
    for(k=0; k<adbQueryResult->__sizeRlist; k++){
      adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
      adbQueryResult->Dist[k]=distances[k];
      adbQueryResult->Qpos[k]=qIndexes[k];
      unsigned cumTrack=0;
      for(l=0 ; l<dbH->numFiles; l++){
	cumTrack+=trackTable[l];
	if(sIndexes[k]<cumTrack){
	  sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE);
	  break;
	}
      }
      adbQueryResult->Spos[k]=sIndexes[k]+trackTable[l]-cumTrack;
    }
  }

  // Clean up
  if(queryCopy)
    delete queryCopy;
  if(qNorm)
    delete qNorm;
  if(timesdata)
    delete timesdata;
  if(dbdurs)
    delete dbdurs;
}

// trackPointQuery
// return the trackNN closest tracks to the query track
// uses average of pointNN points per track
void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){
  initTables(dbName, 0, inFile);

  // For each input vector, find the closest pointNN matching output vectors and report
  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
  unsigned numTracks = dbH->numFiles;

  double* query = (double*)(indata+sizeof(int));
  double* data = dataBuf;
  double* queryCopy = 0;

  if( dbH->flags & O2_FLAG_L2NORM ){
    // Make a copy of the query
    queryCopy = new double[numVectors*dbH->dim];
    qNorm = new double[numVectors];
    assert(queryCopy&&qNorm);
    memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
    unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
    query = queryCopy;
  }

  assert(pointNN>0 && pointNN<=O2_MAXNN);
  assert(trackNN>0 && trackNN<=O2_MAXNN);

  // Make temporary dynamic memory for results
  double trackDistances[trackNN];
  unsigned trackIDs[trackNN];
  unsigned trackQIndexes[trackNN];
  unsigned trackSIndexes[trackNN];

  double distances[pointNN];
  unsigned qIndexes[pointNN];
  unsigned sIndexes[pointNN];

  unsigned j=numVectors; // number of query points
  unsigned k,l,n, track, trackOffset=0, processedTracks=0;
  double thisDist;

  for(k=0; k<pointNN; k++){
    distances[k]=0.0;
    qIndexes[k]=~0;
    sIndexes[k]=~0;
  }

  for(k=0; k<trackNN; k++){
    trackDistances[k]=0.0;
    trackQIndexes[k]=~0;
    trackSIndexes[k]=~0;
    trackIDs[k]=~0;
  }

  double meanQdur = 0;
  double* timesdata = 0;
  double* meanDBdur = 0;

  if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
    cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
    usingTimes=0;
  }

  else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;

  else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
    timesdata = new double[numVectors];
    insertTimeStamps(numVectors, timesFile, timesdata);
    // Calculate durations of points
    for(k=0; k<numVectors-1; k++){
      timesdata[k]=timesdata[k+1]-timesdata[k];
      meanQdur+=timesdata[k];
    }
    meanQdur/=k;
    meanDBdur = new double[dbH->numFiles];
    for(k=0; k<dbH->numFiles; k++){
      meanDBdur[k]=0.0;
      for(j=0; j<trackTable[k]-1 ; j++)
	meanDBdur[k]+=timesTable[j+1]-timesTable[j];
      meanDBdur[k]/=j;
    }
  }

  if(usingQueryPoint)
    if(queryPoint>numVectors-1)
      error("queryPoint > numVectors in query");
    else{
      if(verbosity>1)
	cerr << "query point: " << queryPoint << endl; cerr.flush();
      query=query+queryPoint*dbH->dim;
      numVectors=queryPoint+1;
    }

  // build track offset table
  unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
  unsigned cumTrack=0;
  unsigned trackIndexOffset;
  for(k=0; k<dbH->numFiles;k++){
    trackOffsetTable[k]=cumTrack;
    cumTrack+=trackTable[k]*dbH->dim;
  }

  char nextKey[MAXSTR];

  gettimeofday(&tv1, NULL);

  for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
    if(trackFile){
      if(!trackFile->eof()){
	trackFile->getline(nextKey,MAXSTR);
	track=getKeyPos(nextKey);
      }
      else
	break;
    }
    trackOffset=trackOffsetTable[track];     // numDoubles offset
    trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
    if(verbosity>7)
      cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush();

    if(dbH->flags & O2_FLAG_L2NORM)
      usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy;
    else
      usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int));
    if(usingQueryPoint)
      j=1;
    else
      j=numVectors;
    while(j--){
      k=trackTable[track];  // number of vectors in track
      data=dataBuf+trackOffset; // data for track
      while(k--){
	thisDist=0;
	l=dbH->dim;
	double* q=query;
	while(l--)
	  thisDist+=*q++**data++;
	if(!usingTimes ||
	   (usingTimes
	    && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
	  n=pointNN;
	  while(n--){
	    if(thisDist>=distances[n]){
	      if((n==0 || thisDist<=distances[n-1])){
		// Copy all values above up the queue
		for( l=pointNN-1 ; l > n ; l--){
		  distances[l]=distances[l-1];
		  qIndexes[l]=qIndexes[l-1];
		  sIndexes[l]=sIndexes[l-1];
		}
		distances[n]=thisDist;
		qIndexes[n]=numVectors-j-1;
		sIndexes[n]=trackTable[track]-k-1;
		break;
	      }
	    }
	    else
	      break;
	  }
	}
      } // track
      // Move query pointer to next query point
      query+=dbH->dim;
    } // query
    // Take the average of this track's distance
    // Test the track distances
    thisDist=0;
    n=pointNN;
    while(n--)
      thisDist+=distances[pointNN-n-1];
    thisDist/=pointNN;
    n=trackNN;
    while(n--){
      if(thisDist>=trackDistances[n]){
	if((n==0 || thisDist<=trackDistances[n-1])){
	  // Copy all values above up the queue
	  for( l=pointNN-1 ; l > n ; l--){
	    trackDistances[l]=trackDistances[l-1];
	    trackQIndexes[l]=trackQIndexes[l-1];
	    trackSIndexes[l]=trackSIndexes[l-1];
	    trackIDs[l]=trackIDs[l-1];
	  }
	  trackDistances[n]=thisDist;
	  trackQIndexes[n]=qIndexes[0];
	  trackSIndexes[n]=sIndexes[0];
	  trackIDs[n]=track;
	  break;
	}
      }
      else
	break;
    }
    for(unsigned k=0; k<pointNN; k++){
      distances[k]=0.0;
      qIndexes[k]=~0;
      sIndexes[k]=~0;
    }
  } // tracks
  gettimeofday(&tv2, NULL);

  if(verbosity>1)
    cerr << endl << "processed tracks :" << processedTracks
	 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;

  if(adbQueryResult==0){
    if(verbosity>1)
      cerr<<endl;
    // Output answer
    // Loop over nearest neighbours
    for(k=0; k < min(trackNN,processedTracks); k++)
      cout << fileTable+trackIDs[k]*O2_FILETABLESIZE
	   << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
  }
  else{ // Process Web Services Query
    int listLen = min(trackNN, processedTracks);
    adbQueryResult->__sizeRlist=listLen;
    adbQueryResult->__sizeDist=listLen;
    adbQueryResult->__sizeQpos=listLen;
    adbQueryResult->__sizeSpos=listLen;
    adbQueryResult->Rlist= new char*[listLen];
    adbQueryResult->Dist = new double[listLen];
    adbQueryResult->Qpos = new int[listLen];
    adbQueryResult->Spos = new int[listLen];
    for(k=0; k<adbQueryResult->__sizeRlist; k++){
      adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
      adbQueryResult->Dist[k]=trackDistances[k];
      adbQueryResult->Qpos[k]=trackQIndexes[k];
      adbQueryResult->Spos[k]=trackSIndexes[k];
      sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
    }
  }


  // Clean up
  if(trackOffsetTable)
    delete trackOffsetTable;
  if(queryCopy)
    delete queryCopy;
  if(qNorm)
    delete qNorm;
  if(timesdata)
    delete timesdata;
  if(meanDBdur)
    delete meanDBdur;

}


// k nearest-neighbor (k-NN) search between query and target tracks
// efficient implementation based on matched filter
// assumes normed shingles
// outputs distances of retrieved shingles, max retreived = pointNN shingles per per track
void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){

  initTables(dbName, 0, inFile);

  // For each input vector, find the closest pointNN matching output vectors and report
  // we use stdout in this stub version
  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
  unsigned numTracks = dbH->numFiles;

  double* query = (double*)(indata+sizeof(int));
  double* data = dataBuf;
  double* queryCopy = 0;

  double qMeanL2;
  double* sMeanL2;

  unsigned USE_THRESH=0;
  double SILENCE_THRESH=0;
  double DIFF_THRESH=0;

  if(!(dbH->flags & O2_FLAG_L2NORM) )
    error("Database must be L2 normed for sequence query","use -l2norm");

  if(verbosity>1)
    cerr << "performing norms ... "; cerr.flush();
  unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);

  // Make a copy of the query
  queryCopy = new double[numVectors*dbH->dim];
  memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
  qNorm = new double[numVectors];
  sNorm = new double[dbVectors];
  sMeanL2=new double[dbH->numFiles];
  assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
  unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
  query = queryCopy;

  // Make norm measurements relative to sequenceLength
  unsigned w = sequenceLength-1;
  unsigned i,j;
  double* ps;
  double tmp1,tmp2;

  // Copy the L2 norm values to core to avoid disk random access later on
  memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
  double* snPtr = sNorm;
  for(i=0; i<dbH->numFiles; i++){
    if(trackTable[i]>=sequenceLength){
      tmp1=*snPtr;
      j=1;
      w=sequenceLength-1;
      while(w--)
	*snPtr+=snPtr[j++];
      ps = snPtr+1;
      w=trackTable[i]-sequenceLength; // +1 - 1
      while(w--){
	tmp2=*ps;
	*ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
	tmp1=tmp2;
	ps++;
      }
      ps = snPtr;
      w=trackTable[i]-sequenceLength+1;
      while(w--){
	*ps=sqrt(*ps);
	ps++;
      }
    }
    snPtr+=trackTable[i];
  }

  double* pn = sMeanL2;
  w=dbH->numFiles;
  while(w--)
    *pn++=0.0;
  ps=sNorm;
  unsigned processedTracks=0;
  for(i=0; i<dbH->numFiles; i++){
    if(trackTable[i]>sequenceLength-1){
      w = trackTable[i]-sequenceLength;
      pn = sMeanL2+i;
      *pn=0;
      while(w--)
	if(*ps>0)
	  *pn+=*ps++;
      *pn/=trackTable[i]-sequenceLength;
      SILENCE_THRESH+=*pn;
      processedTracks++;
    }
    ps = sNorm + trackTable[i];
  }
  if(verbosity>1)
    cerr << "processedTracks: " << processedTracks << endl;


  SILENCE_THRESH/=processedTracks;
  USE_THRESH=1; // Turn thresholding on
  DIFF_THRESH=SILENCE_THRESH; //  mean shingle power
  SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
  if(verbosity>4)
    cerr << "silence thresh: " << SILENCE_THRESH;
  w=sequenceLength-1;
  i=1;
  tmp1=*qNorm;
  while(w--)
    *qNorm+=qNorm[i++];
  ps = qNorm+1;
  w=numVectors-sequenceLength; // +1 -1
  while(w--){
    tmp2=*ps;
    *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
    tmp1=tmp2;
    ps++;
  }
  ps = qNorm;
  qMeanL2 = 0;
  w=numVectors-sequenceLength+1;
  while(w--){
    *ps=sqrt(*ps);
    qMeanL2+=*ps++;
  }
  qMeanL2 /= numVectors-sequenceLength+1;

  if(verbosity>1)
    cerr << "done." << endl;


  if(verbosity>1)
    cerr << "matching tracks..." << endl;

  assert(pointNN>0 && pointNN<=O2_MAXNN);
  assert(trackNN>0 && trackNN<=O2_MAXNN);

  // Make temporary dynamic memory for results
  double trackDistances[trackNN];
  unsigned trackIDs[trackNN];
  unsigned trackQIndexes[trackNN];
  unsigned trackSIndexes[trackNN];

  double distances[pointNN];
  unsigned qIndexes[pointNN];
  unsigned sIndexes[pointNN];


  unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
  double thisDist;
  double oneOverWL=1.0/wL;

  for(k=0; k<pointNN; k++){
    distances[k]=1.0e6;
    qIndexes[k]=~0;
    sIndexes[k]=~0;
  }

  for(k=0; k<trackNN; k++){
    trackDistances[k]=1.0e6;
    trackQIndexes[k]=~0;
    trackSIndexes[k]=~0;
    trackIDs[k]=~0;
  }

  // Timestamp and durations processing
  double meanQdur = 0;
  double* timesdata = 0;
  double* meanDBdur = 0;

  if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
    cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
    usingTimes=0;
  }

  else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;

  else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
    timesdata = new double[numVectors];
    assert(timesdata);
    insertTimeStamps(numVectors, timesFile, timesdata);
    // Calculate durations of points
    for(k=0; k<numVectors-1; k++){
      timesdata[k]=timesdata[k+1]-timesdata[k];
      meanQdur+=timesdata[k];
    }
    meanQdur/=k;
    if(verbosity>1)
      cerr << "mean query file duration: " << meanQdur << endl;
    meanDBdur = new double[dbH->numFiles];
    assert(meanDBdur);
    for(k=0; k<dbH->numFiles; k++){
      meanDBdur[k]=0.0;
      for(j=0; j<trackTable[k]-1 ; j++)
	meanDBdur[k]+=timesTable[j+1]-timesTable[j];
      meanDBdur[k]/=j;
    }
  }

  if(usingQueryPoint)
    if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
      error("queryPoint > numVectors-wL+1 in query");
    else{
      if(verbosity>1)
	cerr << "query point: " << queryPoint << endl; cerr.flush();
      query=query+queryPoint*dbH->dim;
      qNorm=qNorm+queryPoint;
      numVectors=wL;
    }

  double ** D = 0;    // Differences query and target
  double ** DD = 0;   // Matched filter distance

  D = new double*[numVectors];
  assert(D);
  DD = new double*[numVectors];
  assert(DD);

  gettimeofday(&tv1, NULL);
  processedTracks=0;
  unsigned successfulTracks=0;

  double* qp;
  double* sp;
  double* dp;
  double diffL2;

  // build track offset table
  unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
  unsigned cumTrack=0;
  unsigned trackIndexOffset;
  for(k=0; k<dbH->numFiles;k++){
    trackOffsetTable[k]=cumTrack;
    cumTrack+=trackTable[k]*dbH->dim;
  }

  char nextKey [MAXSTR];

  // chi^2 statistics
  double sampleCount = 0;
  double sampleSum = 0;
  double logSampleSum = 0;
  double minSample = 1e9;
  double maxSample = 0;

  // Track loop
  for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){

    // get trackID from file if using a control file
    if(trackFile){
      if(!trackFile->eof()){
	trackFile->getline(nextKey,MAXSTR);
	track=getKeyPos(nextKey);
      }
      else
	break;
    }

    trackOffset=trackOffsetTable[track];     // numDoubles offset
    trackIndexOffset=trackOffset/dbH->dim; // numVectors offset

    if(sequenceLength<trackTable[track]){  // test for short sequences

      if(verbosity>7)
	cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();

      // Sum products matrix
      for(j=0; j<numVectors;j++){
	D[j]=new double[trackTable[track]];
	assert(D[j]);

      }

      // Matched filter matrix
      for(j=0; j<numVectors;j++){
	DD[j]=new double[trackTable[track]];
	assert(DD[j]);
      }

      double tmp;
      // Dot product
      for(j=0; j<numVectors; j++)
	for(k=0; k<trackTable[track]; k++){
	  qp=query+j*dbH->dim;
	  sp=dataBuf+trackOffset+k*dbH->dim;
	  DD[j][k]=0.0; // Initialize matched filter array
	  dp=&D[j][k];  // point to correlation cell j,k
	  *dp=0.0;      // initialize correlation cell
	  l=dbH->dim;         // size of vectors
	  while(l--)
	    *dp+=*qp++**sp++;
	}

      // Matched Filter
      // HOP SIZE == 1
      double* spd;
      if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
	for(w=0; w<wL; w++)
	  for(j=0; j<numVectors-w; j++){
	    sp=DD[j];
	    spd=D[j+w]+w;
	    k=trackTable[track]-w;
	    while(k--)
	      *sp+++=*spd++;
	  }
      }

      else{ // HOP_SIZE != 1
	for(w=0; w<wL; w++)
	  for(j=0; j<numVectors-w; j+=HOP_SIZE){
	    sp=DD[j];
	    spd=D[j+w]+w;
	    for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
	      *sp+=*spd;
	      sp+=HOP_SIZE;
	      spd+=HOP_SIZE;
	    }
	  }
      }

      if(verbosity>3 && usingTimes){
	cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
	cerr.flush();
      }

      if(!usingTimes ||
	 (usingTimes
	  && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){

	if(verbosity>3 && usingTimes){
	  cerr << "within duration tolerance." << endl;
	  cerr.flush();
	}

	// Search for minimum distance by shingles (concatenated vectors)
	for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
	  for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
	    thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
	    if(verbosity>10)
	      cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl;
	    // Gather chi^2 statistics
	    if(thisDist<minSample)
	      minSample=thisDist;
	    else if(thisDist>maxSample)
	      maxSample=thisDist;
	    if(thisDist>1e-9){
	      sampleCount++;
	      sampleSum+=thisDist;
	      logSampleSum+=log(thisDist);
	    }

	    // diffL2 = fabs(qNorm[j] - sNorm[trackIndexOffset+k]);
	    // Power test
	    if(!USE_THRESH ||
	       // Threshold on mean L2 of Q and S sequences
	       (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH &&
		// Are both query and target windows above mean energy?
		(qNorm[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // &&  diffL2 < DIFF_THRESH )))
	      thisDist=thisDist; // Computed above
	    else
	      thisDist=1000000.0;

	    // k-NN match algorithm
	    m=pointNN;
	    while(m--){
	      if(thisDist<=distances[m])
		if(m==0 || thisDist>=distances[m-1]){
		// Shuffle distances up the list
		for(l=pointNN-1; l>m; l--){
		  distances[l]=distances[l-1];
		  qIndexes[l]=qIndexes[l-1];
		  sIndexes[l]=sIndexes[l-1];
		}
		distances[m]=thisDist;
		if(usingQueryPoint)
		  qIndexes[m]=queryPoint;
		else
		  qIndexes[m]=j;
		sIndexes[m]=k;
		break;
		}
	    }
	  }
	// Calculate the mean of the N-Best matches
	thisDist=0.0;
	for(m=0; m<pointNN; m++) {
          if (distances[m] == 1000000.0) break;
	  thisDist+=distances[m];
        }
	thisDist/=m;

	// Let's see the distances then...
	if(verbosity>3)
	  cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;


	// All the track stuff goes here
	n=trackNN;
	while(n--){
	  if(thisDist<=trackDistances[n]){
	    if((n==0 || thisDist>=trackDistances[n-1])){
	      // Copy all values above up the queue
	      for( l=trackNN-1 ; l > n ; l--){
		trackDistances[l]=trackDistances[l-1];
		trackQIndexes[l]=trackQIndexes[l-1];
		trackSIndexes[l]=trackSIndexes[l-1];
		trackIDs[l]=trackIDs[l-1];
	      }
	      trackDistances[n]=thisDist;
	      trackQIndexes[n]=qIndexes[0];
	      trackSIndexes[n]=sIndexes[0];
	      successfulTracks++;
	      trackIDs[n]=track;
	      break;
	    }
	  }
	  else
	    break;
	}
      } // Duration match

      // Clean up current track
      if(D!=NULL){
	for(j=0; j<numVectors; j++)
	  delete[] D[j];
      }

      if(DD!=NULL){
	for(j=0; j<numVectors; j++)
	  delete[] DD[j];
      }
    }
    // per-track reset array values
    for(unsigned k=0; k<pointNN; k++){
      distances[k]=1.0e6;
      qIndexes[k]=~0;
      sIndexes[k]=~0;
    }
  }

  gettimeofday(&tv2,NULL);
  if(verbosity>1){
    cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
	 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
    cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
	 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
  }
  if(adbQueryResult==0){
    if(verbosity>1)
      cerr<<endl;
    // Output answer
    // Loop over nearest neighbours
    for(k=0; k < min(trackNN,successfulTracks); k++)
      cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " "
	   << trackQIndexes[k] << " " << trackSIndexes[k] << endl;
  }
  else{ // Process Web Services Query
    int listLen = min(trackNN, processedTracks);
    adbQueryResult->__sizeRlist=listLen;
    adbQueryResult->__sizeDist=listLen;
    adbQueryResult->__sizeQpos=listLen;
    adbQueryResult->__sizeSpos=listLen;
    adbQueryResult->Rlist= new char*[listLen];
    adbQueryResult->Dist = new double[listLen];
    adbQueryResult->Qpos = new int[listLen];
    adbQueryResult->Spos = new int[listLen];
    for(k=0; k<adbQueryResult->__sizeRlist; k++){
      adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
      adbQueryResult->Dist[k]=trackDistances[k];
      adbQueryResult->Qpos[k]=trackQIndexes[k];
      adbQueryResult->Spos[k]=trackSIndexes[k];
      sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
    }
  }


  // Clean up
  if(trackOffsetTable)
    delete[] trackOffsetTable;
  if(queryCopy)
    delete[] queryCopy;
  //if(qNorm)
  //delete qNorm;
  if(D)
    delete[] D;
  if(DD)
    delete[] DD;
  if(timesdata)
    delete[] timesdata;
  if(meanDBdur)
    delete[] meanDBdur;


}

// Radius search between query and target tracks
// efficient implementation based on matched filter
// assumes normed shingles
// outputs count of retrieved shingles, max retreived = one shingle per query shingle per track
void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){

  initTables(dbName, 0, inFile);

  // For each input vector, find the closest pointNN matching output vectors and report
  // we use stdout in this stub version
  unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
  unsigned numTracks = dbH->numFiles;

  double* query = (double*)(indata+sizeof(int));
  double* data = dataBuf;
  double* queryCopy = 0;

  double qMeanL2;
  double* sMeanL2;

  unsigned USE_THRESH=0;
  double SILENCE_THRESH=0;
  double DIFF_THRESH=0;

  if(!(dbH->flags & O2_FLAG_L2NORM) )
    error("Database must be L2 normed for sequence query","use -l2norm");

  if(verbosity>1)
    cerr << "performing norms ... "; cerr.flush();
  unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);

  // Make a copy of the query
  queryCopy = new double[numVectors*dbH->dim];
  memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
  qNorm = new double[numVectors];
  sNorm = new double[dbVectors];
  sMeanL2=new double[dbH->numFiles];
  assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength);
  unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
  query = queryCopy;

  // Make norm measurements relative to sequenceLength
  unsigned w = sequenceLength-1;
  unsigned i,j;
  double* ps;
  double tmp1,tmp2;

  // Copy the L2 norm values to core to avoid disk random access later on
  memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
  double* snPtr = sNorm;
  for(i=0; i<dbH->numFiles; i++){
    if(trackTable[i]>=sequenceLength){
      tmp1=*snPtr;
      j=1;
      w=sequenceLength-1;
      while(w--)
	*snPtr+=snPtr[j++];
      ps = snPtr+1;
      w=trackTable[i]-sequenceLength; // +1 - 1
      while(w--){
	tmp2=*ps;
	*ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
	tmp1=tmp2;
	ps++;
      }
      ps = snPtr;
      w=trackTable[i]-sequenceLength+1;
      while(w--){
	*ps=sqrt(*ps);
	ps++;
      }
    }
    snPtr+=trackTable[i];
  }

  double* pn = sMeanL2;
  w=dbH->numFiles;
  while(w--)
    *pn++=0.0;
  ps=sNorm;
  unsigned processedTracks=0;
  for(i=0; i<dbH->numFiles; i++){
    if(trackTable[i]>sequenceLength-1){
      w = trackTable[i]-sequenceLength;
      pn = sMeanL2+i;
      *pn=0;
      while(w--)
	if(*ps>0)
	  *pn+=*ps++;
      *pn/=trackTable[i]-sequenceLength;
      SILENCE_THRESH+=*pn;
      processedTracks++;
    }
    ps = sNorm + trackTable[i];
  }
  if(verbosity>1)
    cerr << "processedTracks: " << processedTracks << endl;


  SILENCE_THRESH/=processedTracks;
  USE_THRESH=1; // Turn thresholding on
  DIFF_THRESH=SILENCE_THRESH; //  mean shingle power
  SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE
  if(verbosity>4)
    cerr << "silence thresh: " << SILENCE_THRESH;
  w=sequenceLength-1;
  i=1;
  tmp1=*qNorm;
  while(w--)
    *qNorm+=qNorm[i++];
  ps = qNorm+1;
  w=numVectors-sequenceLength; // +1 -1
  while(w--){
    tmp2=*ps;
    *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1);
    tmp1=tmp2;
    ps++;
  }
  ps = qNorm;
  qMeanL2 = 0;
  w=numVectors-sequenceLength+1;
  while(w--){
    *ps=sqrt(*ps);
    qMeanL2+=*ps++;
  }
  qMeanL2 /= numVectors-sequenceLength+1;

  if(verbosity>1)
    cerr << "done." << endl;


  if(verbosity>1)
    cerr << "matching tracks..." << endl;

  assert(pointNN>0 && pointNN<=O2_MAXNN);
  assert(trackNN>0 && trackNN<=O2_MAXNN);

  // Make temporary dynamic memory for results
  double trackDistances[trackNN];
  unsigned trackIDs[trackNN];
  unsigned trackQIndexes[trackNN];
  unsigned trackSIndexes[trackNN];

  double distances[pointNN];
  unsigned qIndexes[pointNN];
  unsigned sIndexes[pointNN];


  unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
  double thisDist;
  double oneOverWL=1.0/wL;

  for(k=0; k<pointNN; k++){
    distances[k]=0.0;
    qIndexes[k]=~0;
    sIndexes[k]=~0;
  }

  for(k=0; k<trackNN; k++){
    trackDistances[k]=0.0;
    trackQIndexes[k]=~0;
    trackSIndexes[k]=~0;
    trackIDs[k]=~0;
  }

  // Timestamp and durations processing
  double meanQdur = 0;
  double* timesdata = 0;
  double* meanDBdur = 0;

  if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
    cerr << "warning: ignoring query timestamps for non-timestamped database" << endl;
    usingTimes=0;
  }

  else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
    cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl;

  else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
    timesdata = new double[numVectors];
    assert(timesdata);
    insertTimeStamps(numVectors, timesFile, timesdata);
    // Calculate durations of points
    for(k=0; k<numVectors-1; k++){
      timesdata[k]=timesdata[k+1]-timesdata[k];
      meanQdur+=timesdata[k];
    }
    meanQdur/=k;
    if(verbosity>1)
      cerr << "mean query file duration: " << meanQdur << endl;
    meanDBdur = new double[dbH->numFiles];
    assert(meanDBdur);
    for(k=0; k<dbH->numFiles; k++){
      meanDBdur[k]=0.0;
      for(j=0; j<trackTable[k]-1 ; j++)
	meanDBdur[k]+=timesTable[j+1]-timesTable[j];
      meanDBdur[k]/=j;
    }
  }

  if(usingQueryPoint)
    if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
      error("queryPoint > numVectors-wL+1 in query");
    else{
      if(verbosity>1)
	cerr << "query point: " << queryPoint << endl; cerr.flush();
      query=query+queryPoint*dbH->dim;
      qNorm=qNorm+queryPoint;
      numVectors=wL;
    }

  double ** D = 0;    // Differences query and target
  double ** DD = 0;   // Matched filter distance

  D = new double*[numVectors];
  assert(D);
  DD = new double*[numVectors];
  assert(DD);

  gettimeofday(&tv1, NULL);
  processedTracks=0;
  unsigned successfulTracks=0;

  double* qp;
  double* sp;
  double* dp;
  double diffL2;

  // build track offset table
  unsigned *trackOffsetTable = new unsigned[dbH->numFiles];
  unsigned cumTrack=0;
  unsigned trackIndexOffset;
  for(k=0; k<dbH->numFiles;k++){
    trackOffsetTable[k]=cumTrack;
    cumTrack+=trackTable[k]*dbH->dim;
  }

  char nextKey [MAXSTR];

  // chi^2 statistics
  double sampleCount = 0;
  double sampleSum = 0;
  double logSampleSum = 0;
  double minSample = 1e9;
  double maxSample = 0;

  // Track loop
  for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){

    // get trackID from file if using a control file
    if(trackFile){
      if(!trackFile->eof()){
	trackFile->getline(nextKey,MAXSTR);
	track=getKeyPos(nextKey);
      }
      else
	break;
    }

    trackOffset=trackOffsetTable[track];     // numDoubles offset
    trackIndexOffset=trackOffset/dbH->dim; // numVectors offset

    if(sequenceLength<trackTable[track]){  // test for short sequences

      if(verbosity>7)
	cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush();

      // Sum products matrix
      for(j=0; j<numVectors;j++){
	D[j]=new double[trackTable[track]];
	assert(D[j]);

      }

      // Matched filter matrix
      for(j=0; j<numVectors;j++){
	DD[j]=new double[trackTable[track]];
	assert(DD[j]);
      }

      double tmp;
      // Dot product
      for(j=0; j<numVectors; j++)
	for(k=0; k<trackTable[track]; k++){
	  qp=query+j*dbH->dim;
	  sp=dataBuf+trackOffset+k*dbH->dim;
	  DD[j][k]=0.0; // Initialize matched filter array
	  dp=&D[j][k];  // point to correlation cell j,k
	  *dp=0.0;      // initialize correlation cell
	  l=dbH->dim;         // size of vectors
	  while(l--)
	    *dp+=*qp++**sp++;
	}

      // Matched Filter
      // HOP SIZE == 1
      double* spd;
      if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
	for(w=0; w<wL; w++)
	  for(j=0; j<numVectors-w; j++){
	    sp=DD[j];
	    spd=D[j+w]+w;
	    k=trackTable[track]-w;
	    while(k--)
	      *sp+++=*spd++;
	  }
      }

      else{ // HOP_SIZE != 1
	for(w=0; w<wL; w++)
	  for(j=0; j<numVectors-w; j+=HOP_SIZE){
	    sp=DD[j];
	    spd=D[j+w]+w;
	    for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
	      *sp+=*spd;
	      sp+=HOP_SIZE;
	      spd+=HOP_SIZE;
	    }
	  }
      }

      if(verbosity>3 && usingTimes){
	cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl;
	cerr.flush();
      }

      if(!usingTimes ||
	 (usingTimes
	  && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){

	if(verbosity>3 && usingTimes){
	  cerr << "within duration tolerance." << endl;
	  cerr.flush();
	}

	// Search for minimum distance by shingles (concatenated vectors)
	for(j=0;j<numVectors-wL;j+=HOP_SIZE)
	  for(k=0;k<trackTable[track]-wL;k+=HOP_SIZE){
	    thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
	    if(verbosity>10)
	      cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl;
	    // Gather chi^2 statistics
	    if(thisDist<minSample)
	      minSample=thisDist;
	    else if(thisDist>maxSample)
	      maxSample=thisDist;
	    if(thisDist>1e-9){
	      sampleCount++;
	      sampleSum+=thisDist;
	      logSampleSum+=log(thisDist);
	    }

	    // diffL2 = fabs(qNorm[j] - sNorm[trackIndexOffset+k]);
	    // Power test
	    if(!USE_THRESH ||
	       // Threshold on mean L2 of Q and S sequences
	       (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH &&
		// Are both query and target windows above mean energy?
		(qNorm[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // &&  diffL2 < DIFF_THRESH )))
	      thisDist=thisDist; // Computed above
	    else
	      thisDist=1000000.0;
	    if(thisDist>=0 && thisDist<=radius){
	      distances[0]++; // increment count
	      break; // only need one track point per query point
	    }
	  }
	// How many points were below threshold ?
	thisDist=distances[0];

	// Let's see the distances then...
	if(verbosity>3)
	  cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl;

	// All the track stuff goes here
	n=trackNN;
	while(n--){
	  if(thisDist>trackDistances[n]){
	    if((n==0 || thisDist<=trackDistances[n-1])){
	      // Copy all values above up the queue
	      for( l=trackNN-1 ; l > n ; l--){
		trackDistances[l]=trackDistances[l-1];
		trackQIndexes[l]=trackQIndexes[l-1];
		trackSIndexes[l]=trackSIndexes[l-1];
		trackIDs[l]=trackIDs[l-1];
	      }
	      trackDistances[n]=thisDist;
	      trackQIndexes[n]=qIndexes[0];
	      trackSIndexes[n]=sIndexes[0];
	      successfulTracks++;
	      trackIDs[n]=track;
	      break;
	    }
	  }
	  else
	    break;
	}
      } // Duration match

      // Clean up current track
      if(D!=NULL){
	for(j=0; j<numVectors; j++)
	  delete[] D[j];
      }

      if(DD!=NULL){
	for(j=0; j<numVectors; j++)
	  delete[] DD[j];
      }
    }
    // per-track reset array values
    for(unsigned k=0; k<pointNN; k++){
      distances[k]=0.0;
      qIndexes[k]=~0;
      sIndexes[k]=~0;
    }
  }

  gettimeofday(&tv2,NULL);
  if(verbosity>1){
    cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
	 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl;
    cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
	 << " minSample: " << minSample << " maxSample: " << maxSample << endl;
  }

  if(adbQueryResult==0){
    if(verbosity>1)
      cerr<<endl;
    // Output answer
    // Loop over nearest neighbours
    for(k=0; k < min(trackNN,successfulTracks); k++)
      cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << endl;
  }
  else{ // Process Web Services Query
    int listLen = min(trackNN, processedTracks);
    adbQueryResult->__sizeRlist=listLen;
    adbQueryResult->__sizeDist=listLen;
    adbQueryResult->__sizeQpos=listLen;
    adbQueryResult->__sizeSpos=listLen;
    adbQueryResult->Rlist= new char*[listLen];
    adbQueryResult->Dist = new double[listLen];
    adbQueryResult->Qpos = new int[listLen];
    adbQueryResult->Spos = new int[listLen];
    for(k=0; k<adbQueryResult->__sizeRlist; k++){
      adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR];
      adbQueryResult->Dist[k]=trackDistances[k];
      adbQueryResult->Qpos[k]=trackQIndexes[k];
      adbQueryResult->Spos[k]=trackSIndexes[k];
      sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
    }
  }


  // Clean up
  if(trackOffsetTable)
    delete[] trackOffsetTable;
  if(queryCopy)
    delete[] queryCopy;
  //if(qNorm)
  //delete qNorm;
  if(D)
    delete[] D;
  if(DD)
    delete[] DD;
  if(timesdata)
    delete[] timesdata;
  if(meanDBdur)
    delete[] meanDBdur;


}

void audioDB::normalize(double* X, int dim, int n){
  unsigned c = n*dim;
  double minval,maxval,v,*p;

  p=X;
  while(c--){
    v=*p++;
    if(v<minval)
      minval=v;
    else if(v>maxval)
      maxval=v;
  }

  normalize(X, dim, n, minval, maxval);

}

void audioDB::normalize(double* X, int dim, int n, double minval, double maxval){
  unsigned c = n*dim;
  double *p;


  if(maxval==minval)
    return;

  maxval=1.0/(maxval-minval);
  c=n*dim;
  p=X;

  while(c--){
    *p=(*p-minval)*maxval;
    p++;
  }
}

// Unit norm block of features
void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
  unsigned d;
  double L2, oneOverL2, *p;
  if(verbosity>2)
    cerr << "norming " << n << " vectors...";cerr.flush();
  while(n--){
    p=X;
    L2=0.0;
    d=dim;
    while(d--){
      L2+=*p**p;
      p++;
    }
    /*    L2=sqrt(L2);*/
    if(qNorm)
      *qNorm++=L2;
    /*
    oneOverL2 = 1.0/L2;
    d=dim;
    while(d--){
      *X*=oneOverL2;
      X++;
    */
    X+=dim;
  }
  if(verbosity>2)
    cerr << "done..." << endl;
}

// Unit norm block of features
void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
  unsigned d;
  double L2, oneOverL2, *p;
  unsigned nn = n;

  assert(l2normTable);

  if( !append && (dbH->flags & O2_FLAG_L2NORM) )
    error("Database is already L2 normed", "automatic norm on insert is enabled");

  if(verbosity>2)
    cerr << "norming " << n << " vectors...";cerr.flush();

  double* l2buf = new double[n];
  double* l2ptr = l2buf;
  assert(l2buf);
  assert(X);

  while(nn--){
    p=X;
    *l2ptr=0.0;
    d=dim;
    while(d--){
      *l2ptr+=*p**p;
      p++;
    }
    l2ptr++;
    /*
      oneOverL2 = 1.0/(*l2ptr++);
      d=dim;
      while(d--){
      *X*=oneOverL2;
      X++;
      }
    */
    X+=dim;
  }
  unsigned offset;
  if(append)
    offset=dbH->length/(dbH->dim*sizeof(double)); // number of vectors
  else
    offset=0;
  memcpy(l2normTable+offset, l2buf, n*sizeof(double));
  if(l2buf)
    delete[] l2buf;
  if(verbosity>2)
    cerr << "done..." << endl;
}


// Start an audioDB server on the host
void audioDB::startServer(){
  struct soap soap;
  int m, s; // master and slave sockets
  soap_init(&soap);
  m = soap_bind(&soap, NULL, port, 100);
  if (m < 0)
    soap_print_fault(&soap, stderr);
  else
    {
      fprintf(stderr, "Socket connection successful: master socket = %d\n", m);
      for (int i = 1; ; i++)
	{
	  s = soap_accept(&soap);
	  if (s < 0)
	    {
	      soap_print_fault(&soap, stderr);
	      break;
	    }
	  fprintf(stderr, "%d: accepted connection from IP=%d.%d.%d.%d socket=%d\n", i,
		  (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s);
	  if (soap_serve(&soap) != SOAP_OK) // process RPC request
	    soap_print_fault(&soap, stderr); // print error
	  fprintf(stderr, "request served\n");
	  soap_destroy(&soap); // clean up class instances
	  soap_end(&soap); // clean up everything and close socket
	}
    }
  soap_done(&soap); // close master socket and detach environment
}


// web services

// SERVER SIDE
int adb__status(struct soap* soap, xsd__string dbName, xsd__int &adbCreateResult){
  char* const argv[]={"audioDB",COM_STATUS,"-d",dbName};
  const unsigned argc = 4;
  audioDB(argc,argv);
  adbCreateResult=100;
  return SOAP_OK;
}

// Literal translation of command line to web service

int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResult &adbQueryResult){
  char queryType[256];
  for(int k=0; k<256; k++)
    queryType[k]='\0';
  if(qType == O2_FLAG_POINT_QUERY)
    strncpy(queryType, "point", strlen("point"));
  else if (qType == O2_FLAG_SEQUENCE_QUERY)
    strncpy(queryType, "sequence", strlen("sequence"));
  else if(qType == O2_FLAG_TRACK_QUERY)
    strncpy(queryType,"track", strlen("track"));
  else
    strncpy(queryType, "", strlen(""));

  if(pointNN==0)
    pointNN=10;
  if(trackNN==0)
    trackNN=10;
  if(seqLen==0)
    seqLen=16;

  char qPosStr[256];
  sprintf(qPosStr, "%d", qPos);
  char pointNNStr[256];
  sprintf(pointNNStr,"%d",pointNN);
  char trackNNStr[256];
  sprintf(trackNNStr,"%d",trackNN);
  char seqLenStr[256];
  sprintf(seqLenStr,"%d",seqLen);

  const  char* argv[] ={
    "./audioDB",
    COM_QUERY,
    queryType, // Need to pass a parameter
    COM_DATABASE,
    dbName,
    COM_FEATURES,
    qKey,
    COM_KEYLIST,
    keyList==0?"":keyList,
    COM_TIMES,
    timesFileName==0?"":timesFileName,
    COM_QPOINT,
    qPosStr,
    COM_POINTNN,
    pointNNStr,
    COM_TRACKNN,
    trackNNStr, // Need to pass a parameter
    COM_SEQLEN,
    seqLenStr
  };

  const unsigned argc = 19;
  audioDB(argc, (char* const*)argv, &adbQueryResult);
  return SOAP_OK;
}

int main(const unsigned argc, char* const argv[]){
  audioDB(argc, argv);
}
author	mas01cr
date	Thu, 20 Sep 2007 08:50:35 +0000
parents	4d9ea08b2f5f
children	32f8b7845b30