# HG changeset patch # User mas01cr # Date 1191584703 0 # Node ID 1521d46bc1ac046f404536c9254bc378f5780e8e # Parent a1d462d592dd761044bb5e0a4ad3791b712881c2 Merge trunk changes -r96:122 to audiodb-debian branch. (and new version in debian/changelog) diff -r a1d462d592dd -r 1521d46bc1ac audioDB.cpp --- a/audioDB.cpp Mon Oct 01 14:59:03 2007 +0000 +++ b/audioDB.cpp Fri Oct 05 11:45:03 2007 +0000 @@ -1,9 +1,21 @@ #include "audioDB.h" -#define O2_DEBUG +#if defined(O2_DEBUG) +void sigterm_action(int signal, siginfo_t *info, void *context) { + exit(128+signal); +} + +void sighup_action(int signal, siginfo_t *info, void *context) { + // FIXME: reread any configuration files +} +#endif void audioDB::error(const char* a, const char* b, const char *sysFunc) { if(isServer) { + /* FIXME: I think this is leaky -- we never delete err. actually + deleting it is tricky, though; it gets placed into some + soap-internal struct with uncertain extent... -- CSR, + 2007-10-01 */ char *err = new char[256]; /* FIXME: overflows */ snprintf(err, 255, "%s: %s\n%s", a, b, sysFunc ? strerror(errno) : ""); /* FIXME: actually we could usefully do with a properly structured @@ -19,42 +31,6 @@ } } -#define O2_AUDIODB_INITIALIZERS \ - dim(0), \ - dbName(0), \ - inFile(0), \ - key(0), \ - trackFileName(0), \ - trackFile(0), \ - command(0), \ - timesFileName(0), \ - timesFile(0), \ - dbfid(0), \ - infid(0), \ - db(0), \ - indata(0), \ - dbH(0), \ - fileTable(0), \ - trackTable(0), \ - dataBuf(0), \ - l2normTable(0), \ - qNorm(0), \ - timesTable(0), \ - verbosity(1), \ - queryType(O2_FLAG_POINT_QUERY), \ - pointNN(O2_DEFAULT_POINTNN), \ - trackNN(O2_DEFAULT_TRACKNN), \ - sequenceLength(16), \ - sequenceHop(1), \ - queryPoint(0), \ - usingQueryPoint(0), \ - usingTimes(0), \ - isClient(0), \ - isServer(0), \ - port(0), \ - timesTol(0.1), \ - radius(0) - audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS { if(processArgs(argc, argv)<0){ @@ -105,22 +81,31 @@ audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult): O2_AUDIODB_INITIALIZERS { - processArgs(argc, argv); - isServer = 1; // FIXME: Hack - assert(O2_ACTION(COM_QUERY)); - query(dbName, inFile, adbQueryResult); + try { + processArgs(argc, argv); + isServer = 1; // FIXME: Hack + assert(O2_ACTION(COM_QUERY)); + query(dbName, inFile, adbQueryResult); + } catch(char *err) { + cleanup(); + throw(err); + } } audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResult *adbStatusResult): O2_AUDIODB_INITIALIZERS { - processArgs(argc, argv); - isServer = 1; // FIXME: Hack - assert(O2_ACTION(COM_STATUS)); - status(dbName, adbStatusResult); + try { + processArgs(argc, argv); + isServer = 1; // FIXME: Hack + assert(O2_ACTION(COM_STATUS)); + status(dbName, adbStatusResult); + } catch(char *err) { + cleanup(); + throw(err); + } } -audioDB::~audioDB(){ - // Clean up +void audioDB::cleanup() { if(indata) munmap(indata,statbuf.st_size); if(db) @@ -133,6 +118,10 @@ delete dbH; } +audioDB::~audioDB(){ + cleanup(); +} + int audioDB::processArgs(const unsigned argc, char* const argv[]){ if(argc<2){ @@ -179,145 +168,150 @@ if(port<100 || port > 100000) error("port out of range"); isServer=1; +#if defined(O2_DEBUG) + struct sigaction sa; + sa.sa_sigaction = sigterm_action; + sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; + sigaction(SIGTERM, &sa, NULL); + sa.sa_sigaction = sighup_action; + sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; + sigaction(SIGHUP, &sa, NULL); +#endif return 0; } // No return on client command, find database command - if(args_info.client_given){ - command=COM_CLIENT; - hostport=args_info.client_arg; - isClient=1; - } + if(args_info.client_given){ + command=COM_CLIENT; + hostport=args_info.client_arg; + isClient=1; + } - if(args_info.NEW_given){ - command=COM_CREATE; - dbName=args_info.database_arg; - return 0; - } + if(args_info.NEW_given){ + command=COM_CREATE; + dbName=args_info.database_arg; + return 0; + } - if(args_info.STATUS_given){ - command=COM_STATUS; - dbName=args_info.database_arg; - return 0; - } + if(args_info.STATUS_given){ + command=COM_STATUS; + dbName=args_info.database_arg; + return 0; + } - if(args_info.DUMP_given){ - command=COM_DUMP; - dbName=args_info.database_arg; - return 0; - } + if(args_info.DUMP_given){ + command=COM_DUMP; + dbName=args_info.database_arg; + return 0; + } - if(args_info.L2NORM_given){ - command=COM_L2NORM; - dbName=args_info.database_arg; - return 0; - } + if(args_info.L2NORM_given){ + command=COM_L2NORM; + dbName=args_info.database_arg; + return 0; + } - if(args_info.INSERT_given){ - command=COM_INSERT; - dbName=args_info.database_arg; - inFile=args_info.features_arg; - if(args_info.key_given) - key=args_info.key_arg; - if(args_info.times_given){ - timesFileName=args_info.times_arg; - if(strlen(timesFileName)>0){ - if(!(timesFile = new ifstream(timesFileName,ios::in))) - error("Could not open times file for reading", timesFileName); - usingTimes=1; - } - } - return 0; - } - - if(args_info.BATCHINSERT_given){ - command=COM_BATCHINSERT; - dbName=args_info.database_arg; - inFile=args_info.featureList_arg; - if(args_info.keyList_given) - key=args_info.keyList_arg; // INCONSISTENT NO CHECK + if(args_info.INSERT_given){ + command=COM_INSERT; + dbName=args_info.database_arg; + inFile=args_info.features_arg; + if(args_info.key_given) + key=args_info.key_arg; + if(args_info.times_given){ + timesFileName=args_info.times_arg; + if(strlen(timesFileName)>0){ + if(!(timesFile = new ifstream(timesFileName,ios::in))) + error("Could not open times file for reading", timesFileName); + usingTimes=1; + } + } + return 0; + } + + if(args_info.BATCHINSERT_given){ + command=COM_BATCHINSERT; + dbName=args_info.database_arg; + inFile=args_info.featureList_arg; + if(args_info.keyList_given) + key=args_info.keyList_arg; // INCONSISTENT NO CHECK - /* TO DO: REPLACE WITH + /* TO DO: REPLACE WITH if(args_info.keyList_given){ trackFileName=args_info.keyList_arg; if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) error("Could not open keyList file for reading",trackFileName); } AND UPDATE BATCHINSERT() - */ - - if(args_info.timesList_given){ - timesFileName=args_info.timesList_arg; - if(strlen(timesFileName)>0){ - if(!(timesFile = new ifstream(timesFileName,ios::in))) - error("Could not open timesList file for reading", timesFileName); - usingTimes=1; - } - } - return 0; - } - - // Query command and arguments - if(args_info.QUERY_given){ - command=COM_QUERY; - dbName=args_info.database_arg; - inFile=args_info.features_arg; - - if(args_info.keyList_given){ - trackFileName=args_info.keyList_arg; - if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) - error("Could not open keyList file for reading",trackFileName); - } - - if(args_info.times_given){ - timesFileName=args_info.times_arg; - if(strlen(timesFileName)>0){ - if(!(timesFile = new ifstream(timesFileName,ios::in))) - error("Could not open times file for reading", timesFileName); - usingTimes=1; - } - } - - // query type - if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) - queryType=O2_FLAG_TRACK_QUERY; - else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) - queryType=O2_FLAG_POINT_QUERY; - else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) - queryType=O2_FLAG_SEQUENCE_QUERY; - else - error("unsupported query type",args_info.QUERY_arg); - - if(!args_info.exhaustive_flag){ - queryPoint = args_info.qpoint_arg; - usingQueryPoint=1; - if(queryPoint<0 || queryPoint >10000) - error("queryPoint out of range: 0 <= queryPoint <= 10000"); - } - - - pointNN=args_info.pointnn_arg; - if(pointNN<1 || pointNN >1000) - error("pointNN out of range: 1 <= pointNN <= 1000"); - - - - trackNN=args_info.resultlength_arg; - if(trackNN<1 || trackNN >10000) - error("resultlength out of range: 1 <= resultlength <= 1000"); - - - sequenceLength=args_info.sequencelength_arg; - if(sequenceLength<1 || sequenceLength >1000) - error("seqlen out of range: 1 <= seqlen <= 1000"); - - sequenceHop=args_info.sequencehop_arg; - if(sequenceHop<1 || sequenceHop >1000) - error("seqhop out of range: 1 <= seqhop <= 1000"); - - return 0; - } - return -1; // no command found + */ + + if(args_info.timesList_given){ + timesFileName=args_info.timesList_arg; + if(strlen(timesFileName)>0){ + if(!(timesFile = new ifstream(timesFileName,ios::in))) + error("Could not open timesList file for reading", timesFileName); + usingTimes=1; + } + } + return 0; + } + + // Query command and arguments + if(args_info.QUERY_given){ + command=COM_QUERY; + dbName=args_info.database_arg; + inFile=args_info.features_arg; + + if(args_info.keyList_given){ + trackFileName=args_info.keyList_arg; + if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) + error("Could not open keyList file for reading",trackFileName); + } + + if(args_info.times_given){ + timesFileName=args_info.times_arg; + if(strlen(timesFileName)>0){ + if(!(timesFile = new ifstream(timesFileName,ios::in))) + error("Could not open times file for reading", timesFileName); + usingTimes=1; + } + } + + // query type + if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) + queryType=O2_TRACK_QUERY; + else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) + queryType=O2_POINT_QUERY; + else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) + queryType=O2_SEQUENCE_QUERY; + else + error("unsupported query type",args_info.QUERY_arg); + + if(!args_info.exhaustive_flag){ + queryPoint = args_info.qpoint_arg; + usingQueryPoint=1; + if(queryPoint<0 || queryPoint >10000) + error("queryPoint out of range: 0 <= queryPoint <= 10000"); + } + + pointNN = args_info.pointnn_arg; + if(pointNN < 1 || pointNN > 1000) { + error("pointNN out of range: 1 <= pointNN <= 1000"); + } + trackNN = args_info.resultlength_arg; + if(trackNN < 1 || trackNN > 1000) { + error("resultlength out of range: 1 <= resultlength <= 1000"); + } + sequenceLength = args_info.sequencelength_arg; + if(sequenceLength < 1 || sequenceLength > 1000) { + error("seqlen out of range: 1 <= seqlen <= 1000"); + } + sequenceHop = args_info.sequencehop_arg; + if(sequenceHop < 1 || sequenceHop > 1000) { + error("seqhop out of range: 1 <= seqhop <= 1000"); + } + return 0; + } + return -1; // no command found } /* Make a new database @@ -334,31 +328,31 @@ -------------------------------------------------------------------------- | key 256 bytes | -------------------------------------------------------------------------- - O2_MAXFILES*02_FILENAMELENGTH + O2_MAXFILES*O2_FILENAMELENGTH trackTable : Maps implicit feature index to a feature vector matrix -------------------------------------------------------------------------- | numVectors (4 bytes) | -------------------------------------------------------------------------- - O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(INT) + O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(INT) featureTable -------------------------------------------------------------------------- | v1 v2 v3 ... vd (double) | -------------------------------------------------------------------------- - O2_MAXFILES * 02_MEANNUMFEATURES * DIM * sizeof(DOUBLE) + O2_MAXFILES * O2_MEANNUMFEATURES * DIM * sizeof(DOUBLE) timesTable -------------------------------------------------------------------------- | timestamp (double) | -------------------------------------------------------------------------- - O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE) + O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE) l2normTable -------------------------------------------------------------------------- | nm (double) | -------------------------------------------------------------------------- - O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE) + O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE) */ @@ -446,25 +440,27 @@ // initTables - memory map files passed as arguments // Precondition: database has already been created -void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile=0){ - if ((dbfid = open (dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) +void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) { + if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { error("Can't open database file", dbName, "open"); + } get_lock(dbfid, forWrite); // open the input file - if (inFile && (infid = open (inFile, O_RDONLY)) < 0) + if (inFile && (infid = open(inFile, O_RDONLY)) < 0) { error("can't open input file for reading", inFile, "open"); - + } // find size of input file - if (inFile && fstat (infid,&statbuf) < 0) - error("fstat error finding size of input", "", "fstat"); - + if (inFile && fstat(infid, &statbuf) < 0) { + error("fstat error finding size of input", inFile, "fstat"); + } // Get the database header info dbH = new dbTableHeaderT(); assert(dbH); - if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) - error("error reading db header"); + if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { + error("error reading db header", dbName, "read"); + } fileTableOffset = O2_HEADERSIZE; trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; @@ -472,19 +468,20 @@ l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); - if(dbH->magic!=O2_MAGIC){ - cerr << "expected: " << O2_MAGIC << ", got:" << dbH->magic << endl; + if(dbH->magic != O2_MAGIC) { + cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl; error("database file has incorrect header",dbName); } if(inFile) - if(dbH->dim==0 && dbH->length==0) // empty database - read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality + if(dbH->dim == 0 && dbH->length == 0) // empty database + // initialize with input dimensionality + read(infid, &dbH->dim, sizeof(unsigned)); else { unsigned test; - read(infid,&test,sizeof(unsigned)); - if(dbH->dim!=test){ - cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <dim != test) { + cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <__sizeRlist=listLen; adbQueryResult->__sizeDist=listLen; adbQueryResult->__sizeQpos=listLen; adbQueryResult->__sizeSpos=listLen; adbQueryResult->Rlist= new char*[listLen]; adbQueryResult->Dist = new double[listLen]; - adbQueryResult->Qpos = new int[listLen]; - adbQueryResult->Spos = new int[listLen]; + adbQueryResult->Qpos = new unsigned int[listLen]; + adbQueryResult->Spos = new unsigned int[listLen]; for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){ adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; adbQueryResult->Dist[k]=distances[k]; @@ -1372,8 +1374,8 @@ adbQueryResult->__sizeSpos=listLen; adbQueryResult->Rlist= new char*[listLen]; adbQueryResult->Dist = new double[listLen]; - adbQueryResult->Qpos = new int[listLen]; - adbQueryResult->Spos = new int[listLen]; + adbQueryResult->Qpos = new unsigned int[listLen]; + adbQueryResult->Spos = new unsigned int[listLen]; for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){ adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; adbQueryResult->Dist[k]=trackDistances[k]; @@ -1870,8 +1872,8 @@ adbQueryResult->__sizeSpos=listLen; adbQueryResult->Rlist= new char*[listLen]; adbQueryResult->Dist = new double[listLen]; - adbQueryResult->Qpos = new int[listLen]; - adbQueryResult->Spos = new int[listLen]; + adbQueryResult->Qpos = new unsigned int[listLen]; + adbQueryResult->Spos = new unsigned int[listLen]; for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){ adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; adbQueryResult->Dist[k]=trackDistances[k]; @@ -2346,8 +2348,8 @@ adbQueryResult->__sizeSpos=listLen; adbQueryResult->Rlist= new char*[listLen]; adbQueryResult->Dist = new double[listLen]; - adbQueryResult->Qpos = new int[listLen]; - adbQueryResult->Spos = new int[listLen]; + adbQueryResult->Qpos = new unsigned int[listLen]; + adbQueryResult->Spos = new unsigned int[listLen]; for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){ adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; adbQueryResult->Dist[k]=trackDistances[k]; @@ -2449,10 +2451,15 @@ X+=dim; } unsigned offset; - if(append) - offset=dbH->length/(dbH->dim*sizeof(double)); // number of vectors - else + if(append) { + // FIXME: a hack, a very palpable hack: the vectors have already + // been inserted, and dbH->length has already been updated. We + // need to subtract off again the number of vectors that we've + // inserted this time... + offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors + } else { offset=0; + } memcpy(l2normTable+offset, l2buf, n*sizeof(double)); if(l2buf) delete[] l2buf; @@ -2467,6 +2474,13 @@ struct soap soap; int m, s; // master and slave sockets soap_init(&soap); + // FIXME: largely this use of SO_REUSEADDR is to make writing (and + // running) test cases more convenient, so that multiple test runs + // in close succession don't fail because of a bin() error. + // Investigate whether there are any potential drawbacks in this, + // and also whether there's a better way to write the tests. -- + // CSR, 2007-10-03 + soap.bind_flags |= SO_REUSEADDR; m = soap_bind(&soap, NULL, port, 100); if (m < 0) soap_print_fault(&soap, stderr); @@ -2515,11 +2529,11 @@ char queryType[256]; for(int k=0; k<256; k++) queryType[k]='\0'; - if(qType == O2_FLAG_POINT_QUERY) + if(qType == O2_POINT_QUERY) strncpy(queryType, "point", strlen("point")); - else if (qType == O2_FLAG_SEQUENCE_QUERY) + else if (qType == O2_SEQUENCE_QUERY) strncpy(queryType, "sequence", strlen("sequence")); - else if(qType == O2_FLAG_TRACK_QUERY) + else if(qType == O2_TRACK_QUERY) strncpy(queryType,"track", strlen("track")); else strncpy(queryType, "", strlen("")); @@ -2575,3 +2589,4 @@ int main(const unsigned argc, char* const argv[]){ audioDB(argc, argv); } + diff -r a1d462d592dd -r 1521d46bc1ac audioDB.h --- a/audioDB.h Mon Oct 01 14:59:03 2007 +0000 +++ b/audioDB.h Fri Oct 05 11:45:03 2007 +0000 @@ -11,6 +11,7 @@ #include #include #include +#include // includes for web services #include "soapH.h" @@ -64,11 +65,13 @@ // Flags #define O2_FLAG_L2NORM (0x1U) #define O2_FLAG_MINMAX (0x2U) -#define O2_FLAG_POINT_QUERY (0x4U) -#define O2_FLAG_SEQUENCE_QUERY (0x8U) -#define O2_FLAG_TRACK_QUERY (0x10U) #define O2_FLAG_TIMES (0x20U) +// Query types +#define O2_POINT_QUERY (0x4U) +#define O2_SEQUENCE_QUERY (0x8U) +#define O2_TRACK_QUERY (0x10U) + // Error Codes #define O2_ERR_KEYNOTFOUND (0xFFFFFF00) @@ -160,6 +163,7 @@ audioDB(const unsigned argc, char* const argv[]); audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult); audioDB(const unsigned argc, char* const argv[], adb__statusResult *adbStatusResult); + void cleanup(); ~audioDB(); int processArgs(const unsigned argc, char* const argv[]); void get_lock(int fd, bool exclusive); @@ -180,3 +184,38 @@ }; +#define O2_AUDIODB_INITIALIZERS \ + dim(0), \ + dbName(0), \ + inFile(0), \ + key(0), \ + trackFileName(0), \ + trackFile(0), \ + command(0), \ + timesFileName(0), \ + timesFile(0), \ + dbfid(0), \ + infid(0), \ + db(0), \ + indata(0), \ + dbH(0), \ + fileTable(0), \ + trackTable(0), \ + dataBuf(0), \ + l2normTable(0), \ + qNorm(0), \ + timesTable(0), \ + verbosity(1), \ + queryType(O2_POINT_QUERY), \ + pointNN(O2_DEFAULT_POINTNN), \ + trackNN(O2_DEFAULT_TRACKNN), \ + sequenceLength(16), \ + sequenceHop(1), \ + queryPoint(0), \ + usingQueryPoint(0), \ + usingTimes(0), \ + isClient(0), \ + isServer(0), \ + port(0), \ + timesTol(0.1), \ + radius(0) diff -r a1d462d592dd -r 1521d46bc1ac audioDBws.h --- a/audioDBws.h Mon Oct 01 14:59:03 2007 +0000 +++ b/audioDBws.h Fri Oct 05 11:45:03 2007 +0000 @@ -18,9 +18,9 @@ int __sizeDist; double *Dist; int __sizeQpos; - int *Qpos; + unsigned int *Qpos; int __sizeSpos; - int *Spos; + unsigned int *Spos; }; class adb__statusResult { diff -r a1d462d592dd -r 1521d46bc1ac debian/changelog --- a/debian/changelog Mon Oct 01 14:59:03 2007 +0000 +++ b/debian/changelog Fri Oct 05 11:45:03 2007 +0000 @@ -1,3 +1,9 @@ +audiodb (1.0-9) unstable; urgency=low + + * Updated to svn version #122 + + -- Christophe Rhodes Fri, 5 Oct 2007 12:39:45 +0100 + audiodb (1.0-8) unstable; urgency=low * updated to svn version #95 diff -r a1d462d592dd -r 1521d46bc1ac docs/TODO.txt --- a/docs/TODO.txt Mon Oct 01 14:59:03 2007 +0000 +++ b/docs/TODO.txt Fri Oct 05 11:45:03 2007 +0000 @@ -1,3 +1,106 @@ +* development of functionality + +** exposure of all non-write functions over Web Services + +At present, the radius / counting query type isn't supported over +SOAP. Supporting it involves changing the adb__query() exported +function, so we need to be careful. + +** matrix of possible queries + +At the moment, there are four content-based query types, each of which +does something slightly different from what you might expect from its +name. I think that the space of possible (sensible?) queries is +larger than this -- though working out the sensible abstraction might +have to wait for more use cases -- and also that the orthogonality of +various parameters is missing. (e.g. a silence threshold should be +applied to all queries or none, if it makes sense at all.) + +Additionally, query by key (filename) might be important. + +** results + +Need to sort out what the results mean; is it a similarity or a +distance score, etc. Also, is it possible to support NN queries in a +non-Euclidean space? + +** SOAP / URIs + +At the moment, the query and database are referred to by paths naming +files on the SOAP server's filesystem. This makes a limited amount of +sense for the database (though exposing implementation details of +ISMS's file system is not a great idea) but makes no sense at all for +the query. So we need to define a query data structure that can be +serialised (preferably automatically) by SOAP for use in queries. + +If we ever support inserting or other write functionality over SOAP, +this will need doing for feature files (the same as queries) and for +key lists too. + +** Memory management tricks + +We have a friendly memory access pattern (at least on Unixoids; +Win32's API isn't a great match for mmap(), so it is significantly +slower there). Investigate whether madvise() tricks improve +performance on any OSes. Also, maybe investigate a specialized use of +GetViewOfFile on win32 to make it tolerable on that platform. + +** LSH + +Integrate the LSH indexing with the database. Can it be done as a +separate index file, created on demand? What are we trying to +optimize our on-disk format for, and can it be better optimized by +having multiple files? + +** RDF (not necessarily related to audioDB) + +Export the results of our experiments (kept in an SQL database) as +RDF, so that people can infer stuff if they know enough about our +methods. + +Possibly also write an export routine for exporting an audioDB as RDF. +And laugh hollowly as XML parsers fail completely to ingest such a +monstrous file. + +* architectural issues + +** API vs command-line + +While having a command line interface is nice, having the only way to +initialize a new audioDB instance being by faking up enough of a +command line to call our wacky constructors is less nice. +Furthermore, having the "business logic" run by the constructor is +also a little bit weird. + +* regression (and other) tests + +** Command line interface + +There is now broad coverage of the audioDB logic, with the major +exceptions of the batch insert command, and the specifying of +different keys on import. + +** SOAP + +The shell's support for wait() and equivalents is limited, so there +are "sleep 1"s dotted around to attempt to avoid race conditions. +Find a better way. Similarly, using SO_REUSEADDR in bind() is a hack +that ought not to be necessary just to run the same test twice... + +** Locking + +The fcntl() locking should be good enough for our uses. Investigate +whether it is in fact robust enough (including that EAGAIN workaround +for OS X; read the kernel source to find out where that's coming from +and report it if possible). + +** Benchmarks + +Get together a realistic set of usage cases, preferably testing each +of the query types, and benchmark them automatically. This is +basically a prerequisite of any performance work. + +* Michael's old TODO list audioDB FIXME: diff -r a1d462d592dd -r 1521d46bc1ac tests/0011/run-test.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0011/run-test.sh Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,40 @@ +#! /bin/sh + +. ../test-utils.sh + +if [ -f testdb ]; then rm -f testdb; fi + +${AUDIODB} -d testdb -N + +intstring 2 > testfeature +floatstring 0 0.5 >> testfeature +floatstring 0.5 0 >> testfeature + +${AUDIODB} -d testdb -I -f testfeature + +# sequence queries require L2NORM +${AUDIODB} -d testdb -L + +echo "query point (0.0,0.5)" +intstring 2 > testquery +floatstring 0 0.5 >> testquery + +${AUDIODB} -d testdb -Q sequence -l 1 -f testquery > testoutput +echo testfeature 1 0 0 > test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -n 1 > testoutput +echo testfeature 0 0 0 > test-expected-output +cmp testoutput test-expected-output + +echo "query point (0.5,0.0)" +intstring 2 > testquery +floatstring 0.5 0 >> testquery + +${AUDIODB} -d testdb -Q sequence -l 1 -f testquery > testoutput +echo testfeature 1 0 1 > test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -n 1 > testoutput +echo testfeature 0 0 1 > test-expected-output +cmp testoutput test-expected-output + +exit 104 diff -r a1d462d592dd -r 1521d46bc1ac tests/0011/short-description --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0011/short-description Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,1 @@ +sequence search / 1 track / non-normed features diff -r a1d462d592dd -r 1521d46bc1ac tests/0012/run-test.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0012/run-test.sh Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,41 @@ +#! /bin/sh + +. ../test-utils.sh + +if [ -f testdb ]; then rm -f testdb; fi + +${AUDIODB} -d testdb -N + +intstring 2 > testfeature +floatstring 0 0.5 >> testfeature +floatstring 0.5 0 >> testfeature + +# sequence queries require L2NORM; check that we can still insert +# after turning flag on +${AUDIODB} -d testdb -L + +${AUDIODB} -d testdb -I -f testfeature + +echo "query point (0.0,0.5)" +intstring 2 > testquery +floatstring 0 0.5 >> testquery + +${AUDIODB} -d testdb -Q sequence -l 1 -f testquery > testoutput +echo testfeature 1 0 0 > test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -n 1 > testoutput +echo testfeature 0 0 0 > test-expected-output +cmp testoutput test-expected-output + +echo "query point (0.5,0.0)" +intstring 2 > testquery +floatstring 0.5 0 >> testquery + +${AUDIODB} -d testdb -Q sequence -l 1 -f testquery > testoutput +echo testfeature 1 0 1 > test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -n 1 > testoutput +echo testfeature 0 0 1 > test-expected-output +cmp testoutput test-expected-output + +exit 104 diff -r a1d462d592dd -r 1521d46bc1ac tests/0012/short-description --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0012/short-description Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,1 @@ +insert after L2Norm \ No newline at end of file diff -r a1d462d592dd -r 1521d46bc1ac tests/0013/run-test.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0013/run-test.sh Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,11 @@ +#! /bin/bash + +. ../test-utils.sh + +start_server ${AUDIODB} 10013 + +check_server $! + +stop_server $! + +exit 104 diff -r a1d462d592dd -r 1521d46bc1ac tests/0013/short-description --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0013/short-description Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,1 @@ +WS server startup / shutdown diff -r a1d462d592dd -r 1521d46bc1ac tests/0014/run-test.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0014/run-test.sh Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,45 @@ +#! /bin/sh + +. ../test-utils.sh + +if [ -f testdb ]; then rm -f testdb; fi + +${AUDIODB} -d testdb -N + +intstring 2 > testfeature +floatstring 0 1 >> testfeature +floatstring 1 0 >> testfeature + +${AUDIODB} -d testdb -I -f testfeature + +echo "query point (0.0,0.5)" +intstring 2 > testquery +floatstring 0 0.5 >> testquery + +start_server ${AUDIODB} 10014 + +${AUDIODB} -c localhost:10014 -d testdb -Q point -f testquery > testoutput +echo testfeature 0.5 0 0 > test-expected-output +echo testfeature 0 0 1 >> test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -c localhost:10014 -d testdb -Q point -f testquery -n 1 > testoutput +echo testfeature 0.5 0 0 > test-expected-output +cmp testoutput test-expected-output + +check_server $! + +echo "query point (0.5,0.0)" +intstring 2 > testquery +floatstring 0.5 0 >> testquery + +${AUDIODB} -c localhost:10014 -d testdb -Q point -f testquery > testoutput +echo testfeature 0.5 0 1 > test-expected-output +echo testfeature 0 0 0 >> test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -c localhost:10014 -d testdb -Q point -f testquery -n 1 > testoutput +echo testfeature 0.5 0 1 > test-expected-output +cmp testoutput test-expected-output + +stop_server $! + +exit 104 diff -r a1d462d592dd -r 1521d46bc1ac tests/0014/short-description --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0014/short-description Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,1 @@ +WS version of 0004 \ No newline at end of file diff -r a1d462d592dd -r 1521d46bc1ac tests/0015/run-test.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0015/run-test.sh Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,37 @@ +#! /bin/bash + +. ../test-utils.sh + +if [ -f testdb ]; then rm -f testdb; fi + +start_server ${AUDIODB} 10015 + +${AUDIODB} -d testdb -N + +${AUDIODB} -c localhost:10015 -d testdb -S > test1 +${AUDIODB} -S -c localhost:10015 -d testdb > test2 +${AUDIODB} -S -d testdb -c localhost:10015 > test3 + +cat > testoutput < testfeature +floatstring 0 1 >> testfeature +floatstring 1 0 >> testfeature + +${AUDIODB} -d testdb -I -f testfeature + +# sequence queries require L2NORM +${AUDIODB} -d testdb -L + +echo "query point (0.0,0.5)" +intstring 2 > testquery +floatstring 0 0.5 >> testquery + +start_server ${AUDIODB} 10016 + +${AUDIODB} -c localhost:10016 -d testdb -Q sequence -l 1 -f testquery > testoutput +echo testfeature 1 0 0 > test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -c localhost:10016 -d testdb -Q sequence -l 1 -f testquery -n 1 > testoutput +echo testfeature 0 0 0 > test-expected-output +cmp testoutput test-expected-output + +check_server $! + +echo "query point (0.5,0.0)" +intstring 2 > testquery +floatstring 0.5 0 >> testquery + +${AUDIODB} -c localhost:10016 -d testdb -Q sequence -l 1 -f testquery > testoutput +echo testfeature 1 0 1 > test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -c localhost:10016 -d testdb -Q sequence -l 1 -f testquery -n 1 > testoutput +echo testfeature 0 0 1 > test-expected-output +cmp testoutput test-expected-output + +stop_server $! + +exit 104 diff -r a1d462d592dd -r 1521d46bc1ac tests/0016/short-description --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0016/short-description Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,1 @@ +WS version of 0006 \ No newline at end of file diff -r a1d462d592dd -r 1521d46bc1ac tests/0017/run-test.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0017/run-test.sh Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,55 @@ +#! /bin/sh + +. ../test-utils.sh + +if [ -f testdb ]; then rm -f testdb; fi + +${AUDIODB} -d testdb -N + +# tests that the lack of -l when the query sequence is shorter doesn't +# segfault. + +intstring 2 > testfeature +floatstring 0 1 >> testfeature +floatstring 1 0 >> testfeature + +${AUDIODB} -d testdb -I -f testfeature + +# sequence queries require L2NORM +${AUDIODB} -d testdb -L + +start_server ${AUDIODB} 10017 + +echo "query point (0.0,0.5)" +intstring 2 > testquery +floatstring 0 0.5 >> testquery + +# FIXME: this actually revealed a horrible failure mode of the server: +# since we were throwing exceptions from the constructor, the +# destructor wasn't getting called and so we were retaining 2Gb of +# address space, leading to immediate out of memory errors for the +# /second/ call. We fix that by being a bit more careful about our +# exception handling and cleanup discipline, but how to test...? + +expect_client_failure ${AUDIODB} -c localhost:10017 -d testdb -Q sequence -f testquery +expect_client_failure ${AUDIODB} -c localhost:10017 -d testdb -Q sequence -f testquery -n 1 + +check_server $! + +echo "query point (0.5,0.0)" +intstring 2 > testquery +floatstring 0.5 0 >> testquery + +expect_client_failure ${AUDIODB} -c localhost:10017 -d testdb -Q sequence -f testquery +expect_client_failure ${AUDIODB} -c localhost:10017 -d testdb -Q sequence -f testquery -n 1 + +check_server $! + +# see if the server can actually produce any output at this point +${AUDIODB} -c localhost:10017 -d testdb -Q sequence -l 1 -f testquery -n 1 > testoutput +echo testfeature 0 0 1 > test-expected-output +cmp testoutput test-expected-output + +stop_server $! + +exit 104 diff -r a1d462d592dd -r 1521d46bc1ac tests/0017/short-description --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0017/short-description Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,1 @@ +WS version of 0007 / destructor cleanup \ No newline at end of file diff -r a1d462d592dd -r 1521d46bc1ac tests/0018/run-test.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0018/run-test.sh Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,50 @@ +#! /bin/sh + +. ../test-utils.sh + +if [ -f testdb ]; then rm -f testdb; fi + +${AUDIODB} -d testdb -N + +intstring 2 > testfeature01 +floatstring 0 1 >> testfeature01 +intstring 2 > testfeature10 +floatstring 1 0 >> testfeature10 + +${AUDIODB} -d testdb -I -f testfeature01 +${AUDIODB} -d testdb -I -f testfeature10 + +# sequence queries require L2NORM +${AUDIODB} -d testdb -L + +start_server ${AUDIODB} 10018 + +echo "query point (0.0,0.5)" +intstring 2 > testquery +floatstring 0 0.5 >> testquery + +${AUDIODB} -c localhost:10018 -d testdb -Q sequence -l 1 -f testquery > testoutput +echo testfeature01 0 0 0 > test-expected-output +echo testfeature10 2 0 0 >> test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -c localhost:10018 -d testdb -Q sequence -l 1 -f testquery -r 1 > testoutput +echo testfeature01 0 0 0 > test-expected-output +cmp testoutput test-expected-output + +check_server $! + +echo "query point (0.5,0.0)" +intstring 2 > testquery +floatstring 0.5 0 >> testquery + +${AUDIODB} -c localhost:10018 -d testdb -Q sequence -l 1 -f testquery > testoutput +echo testfeature10 0 0 0 > test-expected-output +echo testfeature01 2 0 0 >> test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -c localhost:10018 -d testdb -Q sequence -l 1 -f testquery -r 1 > testoutput +echo testfeature10 0 0 0 > test-expected-output +cmp testoutput test-expected-output + +stop_server $! + +exit 104 diff -r a1d462d592dd -r 1521d46bc1ac tests/0018/short-description --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0018/short-description Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,1 @@ +WS version of 0008 \ No newline at end of file diff -r a1d462d592dd -r 1521d46bc1ac tests/0019/run-test.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0019/run-test.sh Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,47 @@ +#! /bin/sh + +. ../test-utils.sh + +if [ -f testdb ]; then rm -f testdb; fi + +${AUDIODB} -d testdb -N + +intstring 2 > testfeature01 +floatstring 0 1 >> testfeature01 +intstring 2 > testfeature10 +floatstring 1 0 >> testfeature10 + +${AUDIODB} -d testdb -I -f testfeature01 +${AUDIODB} -d testdb -I -f testfeature10 + +start_server ${AUDIODB} 10019 + +echo "query point (0.0,0.5)" +intstring 2 > testquery +floatstring 0 0.5 >> testquery + +${AUDIODB} -c localhost:10019 -d testdb -Q track -l 1 -f testquery > testoutput +echo testfeature01 0.5 0 0 > test-expected-output +echo testfeature10 0 0 0 >> test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -c localhost:10019 -d testdb -Q track -l 1 -f testquery -r 1 > testoutput +echo testfeature01 0.5 0 0 > test-expected-output +cmp testoutput test-expected-output + +check_server $! + +echo "query point (0.5,0.0)" +intstring 2 > testquery +floatstring 0.5 0 >> testquery + +${AUDIODB} -c localhost:10019 -d testdb -Q track -l 1 -f testquery > testoutput +echo testfeature10 0.5 0 0 > test-expected-output +echo testfeature01 0 0 0 >> test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -c localhost:10019 -d testdb -Q track -l 1 -f testquery -r 1 > testoutput +echo testfeature10 0.5 0 0 > test-expected-output +cmp testoutput test-expected-output + +stop_server $! + +exit 104 diff -r a1d462d592dd -r 1521d46bc1ac tests/0019/short-description --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0019/short-description Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,1 @@ +WS version of 0009 \ No newline at end of file diff -r a1d462d592dd -r 1521d46bc1ac tests/0020/run-test.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0020/run-test.sh Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,55 @@ +#! /bin/sh + +. ../test-utils.sh + +if [ -f testdb ]; then rm -f testdb; fi + +${AUDIODB} -d testdb -N + +intstring 2 > testfeature01 +floatstring 0 1 >> testfeature01 +intstring 2 > testfeature10 +floatstring 1 0 >> testfeature10 + +${AUDIODB} -d testdb -I -f testfeature01 +${AUDIODB} -d testdb -I -f testfeature10 + +# sequence queries require L2NORM +${AUDIODB} -d testdb -L + +start_server ${AUDIODB} 10020 + +echo "query point (0.0,0.5)" +intstring 2 > testquery +floatstring 0 0.5 >> testquery + +${AUDIODB} -c localhost:10020 -d testdb -Q sequence -l 1 -f testquery -R 5 > testoutput +echo testfeature01 1 > test-expected-output +echo testfeature10 1 >> test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -c localhost:10020 -d testdb -Q sequence -l 1 -f testquery -r 1 -R 5 > testoutput +echo testfeature01 1 > test-expected-output +cmp testoutput test-expected-output + +check_server $! + +echo "query point (0.5,0.0)" +intstring 2 > testquery +floatstring 0.5 0 >> testquery + +# FIXME: because there's only one point in each track (and the query), +# the ordering is essentially database order. We need these test +# cases anyway because we need to test non-segfaulting, non-empty +# results... + +${AUDIODB} -c localhost:10020 -d testdb -Q sequence -l 1 -f testquery -R 5 > testoutput +echo testfeature01 1 > test-expected-output +echo testfeature10 1 >> test-expected-output +cmp testoutput test-expected-output +${AUDIODB} -c localhost:10020 -d testdb -Q sequence -l 1 -f testquery -r 1 -R 5 > testoutput +echo testfeature01 1 > test-expected-output +cmp testoutput test-expected-output + +stop_server $! + +exit 104 diff -r a1d462d592dd -r 1521d46bc1ac tests/0020/short-description --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/0020/short-description Fri Oct 05 11:45:03 2007 +0000 @@ -0,0 +1,1 @@ +WS version of 0010 \ No newline at end of file diff -r a1d462d592dd -r 1521d46bc1ac tests/test-utils.sh --- a/tests/test-utils.sh Mon Oct 01 14:59:03 2007 +0000 +++ b/tests/test-utils.sh Fri Oct 05 11:45:03 2007 +0000 @@ -1,5 +1,7 @@ # no shebang line: this file should be sourced by run-test.sh files +set -E + trap "exit 1" ERR if [ -z ${AUDIODB} ]; then @@ -47,3 +49,29 @@ if [ $1 -ge 10 ]; then echo "intstring() arg too large: ${1}"; exit 1; fi printf "%b\x00\x00\x00" "\\x${1}" } + +# Web services utilities +start_server() { + $1 -s $2 & + # HACK: deal with race on process creation + sleep 1 + trap 'kill $!; exit 1' ERR +} + +stop_server() { + grep ${AUDIODB} /proc/$1/cmdline > /dev/null + kill $1 + # HACK: deal with race on process exit + sleep 1 + expect_clean_error_exit grep ${AUDIODB} /proc/$1/cmdline +} + +check_server() { + grep ${AUDIODB} /proc/$1/cmdline > /dev/null +} + +expect_client_failure() { + # FIXME: work out whether and how the client should report server + # errors. At present, the client exits with a zero exit code. + "$@" +}