mas01cr@0: /* audioDB.h mas01cr@0: mas01cr@0: audioDB version 1.0 mas01cr@0: mas01cr@0: An efficient feature-vector database management system (FVDBMS) for mas01cr@0: content-based multimedia search and retrieval. mas01cr@0: mas01cr@0: Usage: audioDB [OPTIONS]... mas01cr@0: mas01cr@0: --full-help Print help, including hidden options, and exit mas01cr@0: -V, --version Print version and exit mas01cr@0: -H, --help print help on audioDB usage and exit. mas01cr@0: mas01cr@0: Database Setup: mas01cr@0: These commands require a database argument. mas01cr@0: -d, --database=filename database name to be used with database commands mas01cr@0: -N, --new make a new database mas01cr@0: -S, --status database information mas01mc@18: -D, --dump list all tracks: index key size mas01cr@0: mas01cr@0: Database Insertion: mas01cr@0: The following commands process a binary input feature file and optional mas01cr@0: associated key. mas01cr@0: -I, --insert add feature vectors to an existing database mas01cr@0: -f, --features=filename binary series of vectors file mas01cr@0: -t, --times=filename list of time points (ascii) for feature vectors mas01cr@0: -k, --key=identifier unique identifier associated with features mas01cr@0: mas01cr@0: Batch Commands: mas01cr@0: These batch commands require a list of feature vector filenames in a text mas01cr@0: file and optional list of keys in a text file. mas01cr@0: -B, --batchinsert add feature vectors named in a featureList file mas01cr@0: (with optional keys in a keyList file) to the mas01cr@0: named database mas01cr@0: -F, --featureList=filename text file containing list of binary feature mas01cr@0: vector files to process mas01cr@0: -T, --timesList=filename text file containing list of ascii time-point mas01cr@0: files for each feature vector file named in mas01cr@0: featureList mas01cr@0: -K, --keyList=filename text file containing list of unique identifiers mas01cr@0: to associate with list of feature files mas01cr@0: mas01cr@0: Database Search: mas01cr@0: Thse commands control the behaviour of retrieval from a named database. mas01cr@0: -Q, --query perform a content-based search on the named mas01cr@0: database using the named feature vector file mas01cr@0: as a query mas01cr@0: -q, --qtype=type the type of search (possible values="point", mas01mc@18: "track", "sequence" default=`sequence') mas01cr@0: -p, --qpoint=position ordinal position of query vector (or start of mas01cr@0: sequence) in feature vector input file mas01cr@0: (default=`0') mas01cr@0: -n, --pointnn=numpoints number of point nearest neighbours to use [per mas01mc@18: track in track and sequence mode] mas01cr@0: (default=`10') mas01cr@0: -r, --resultlength=length maximum length of the result list mas01cr@0: (default=`10') mas01cr@0: -l, --sequencelength=length length of sequences for sequence search mas01cr@0: (default=`16') mas01cr@0: -h, --sequencehop=hop hop size of sequence window for sequence search mas01cr@0: (default=`1') mas01cr@0: mas01cr@0: Web Services: mas01cr@0: These commands enable the database process to establish a connection via the mas01cr@0: internet and operate as separate client and server processes. mas01cr@0: -s, --server=port run as standalone web service on named port mas01cr@0: (default=`80011') mas01cr@0: -c, --client=hostname:port run as a client using named host service mas01cr@0: mas01cr@0: */ mas01cr@0: mas01cr@0: mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: #include mas01cr@0: mas01cr@0: // includes for web services mas01cr@0: #include "soapH.h" mas01cr@0: #include "adb.nsmap" mas01cr@0: #include "cmdline.h" mas01cr@0: mas01cr@0: #define MAXSTR 512 mas01cr@0: mas01cr@0: // Databse PRIMARY commands mas01cr@0: #define COM_CREATE "--NEW" mas01cr@0: #define COM_INSERT "--INSERT" mas01cr@0: #define COM_BATCHINSERT "--BATCHINSERT" mas01cr@0: #define COM_QUERY "--QUERY" mas01cr@0: #define COM_STATUS "--STATUS" mas01cr@0: #define COM_L2NORM "--L2NORM" mas01cr@0: #define COM_DUMP "--DUMP" mas01cr@0: #define COM_SERVER "--SERVER" mas01cr@0: mas01cr@0: // parameters mas01cr@0: #define COM_CLIENT "--client" mas01cr@0: #define COM_DATABASE "--database" mas01cr@0: #define COM_QTYPE "--qtype" mas01cr@0: #define COM_SEQLEN "--sequencelength" mas01cr@0: #define COM_SEQHOP "--sequencehop" mas01cr@0: #define COM_POINTNN "--pointnn" mas01mc@18: #define COM_TRACKNN "--resultlength" mas01cr@0: #define COM_QPOINT "--qpoint" mas01cr@0: #define COM_FEATURES "--features" mas01cr@0: #define COM_QUERYKEY "--key" mas01cr@0: #define COM_KEYLIST "--keyList" mas01cr@0: #define COM_TIMES "--times" mas01cr@0: mas01cr@0: #define O2_MAGIC 1111765583 // 'B'<<24|'D'<<16|'2'<<8|'O' reads O2DB in little endian order mas01cr@0: mas01cr@0: #define O2_DEFAULT_POINTNN (10U) mas01mc@18: #define O2_DEFAULT_TRACKNN (10U) mas01cr@0: mas01mc@7: #define O2_DEFAULTDBSIZE (2000000000) // 2GB table size mas01mc@7: //#define O2_DEFAULTDBSIZE (1000000000U) // 1GB table size mas01cr@0: mas01cr@0: //#define O2_MAXFILES (1000000) mas01cr@0: #define O2_MAXFILES (10000U) // 10,000 files mas01cr@0: #define O2_MAXFILESTR (256U) mas01cr@0: #define O2_FILETABLESIZE (O2_MAXFILESTR) mas01mc@18: #define O2_TRACKTABLESIZE (sizeof(unsigned)) mas01cr@0: #define O2_HEADERSIZE (sizeof(dbTableHeaderT)) mas01cr@0: #define O2_MEANNUMVECTORS (1000U) mas01cr@0: #define O2_MAXDIM (1000U) mas01mc@17: #define O2_MAXNN (10000U) mas01cr@0: mas01cr@0: // Flags mas01cr@0: #define O2_FLAG_L2NORM (0x1U) mas01cr@0: #define O2_FLAG_MINMAX (0x2U) mas01cr@0: #define O2_FLAG_POINT_QUERY (0x4U) mas01cr@0: #define O2_FLAG_SEQUENCE_QUERY (0x8U) mas01mc@18: #define O2_FLAG_TRACK_QUERY (0x10U) mas01cr@0: #define O2_FLAG_TIMES (0x20U) mas01cr@0: mas01cr@0: // Error Codes mas01cr@0: #define O2_ERR_KEYNOTFOUND (0xFFFFFF00) mas01cr@0: mas01cr@0: // Macros mas01cr@0: #define O2_ACTION(a) (strcmp(command,a)==0) mas01cr@0: mas01cr@0: using namespace std; mas01cr@0: mas01cr@0: // 64 byte header mas01cr@0: typedef struct dbTableHeader{ mas01cr@0: unsigned magic; mas01cr@0: unsigned numFiles; mas01cr@0: unsigned dim; mas01cr@0: unsigned length; mas01cr@0: unsigned flags; mas01cr@0: } dbTableHeaderT, *dbTableHeaderPtr; mas01cr@0: mas01cr@0: mas01cr@0: class audioDB{ mas01cr@0: mas01cr@0: private: mas01cr@0: gengetopt_args_info args_info; mas01cr@0: unsigned dim; mas01cr@0: const char *dbName; mas01cr@0: const char *inFile; mas01cr@0: const char *hostport; mas01cr@0: const char *key; mas01mc@18: const char* trackFileName; mas01mc@18: ifstream *trackFile; mas01cr@0: const char *command; mas01cr@0: const char *timesFileName; mas01cr@0: ifstream *timesFile; mas01cr@0: mas01cr@0: int dbfid; mas01cr@0: int infid; mas01cr@0: char* db; mas01cr@0: char* indata; mas01cr@0: struct stat statbuf; mas01cr@0: dbTableHeaderPtr dbH; mas01cr@0: size_t fileTableOffset; mas01mc@18: size_t trackTableOffset; mas01cr@0: size_t dataoffset; mas01cr@0: size_t l2normTableOffset; mas01cr@0: size_t timesTableOffset; mas01cr@0: mas01cr@0: char *fileTable; mas01mc@18: unsigned* trackTable; mas01cr@0: double* dataBuf; mas01cr@0: double* inBuf; mas01cr@0: double* l2normTable; mas01cr@0: double* qNorm; mas01cr@0: double* sNorm; mas01cr@0: double* timesTable; mas01cr@0: mas01cr@0: // Flags and parameters mas01cr@0: unsigned verbosity; // how much do we want to know? mas01cr@0: unsigned queryType; // point queries default mas01cr@0: unsigned pointNN; // how many point NNs ? mas01mc@18: unsigned trackNN; // how many track NNs ? mas01cr@0: unsigned sequenceLength; mas01cr@0: unsigned sequenceHop; mas01cr@0: unsigned queryPoint; mas01cr@0: unsigned usingQueryPoint; mas01cr@0: unsigned usingTimes; mas01cr@0: unsigned isClient; mas01cr@0: unsigned isServer; mas01cr@0: unsigned port; mas01cr@0: double timesTol; mas01mc@17: double radius; mas01mc@17: mas01cr@0: // Timers mas01cr@0: struct timeval tv1; mas01cr@0: struct timeval tv2; mas01cr@0: mas01cr@0: // private methods mas01cr@0: void error(const char* a, const char* b = ""); mas01cr@0: void pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); mas01mc@18: void trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); mas01mc@20: void trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); mas01mc@20: void trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); mas01cr@0: mas01cr@27: void initTables(const char* dbName, bool forWrite, const char* inFile); mas01cr@0: void unitNorm(double* X, unsigned d, unsigned n, double* qNorm); mas01cr@0: void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append); mas01cr@0: void normalize(double* X, int dim, int n); mas01cr@0: void normalize(double* X, int dim, int n, double minval, double maxval); mas01cr@0: void insertTimeStamps(unsigned n, ifstream* timesFile, double* timesdata); mas01cr@0: unsigned getKeyPos(char* key); mas01cr@0: public: mas01cr@0: mas01cr@0: audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult=0); mas01cr@0: ~audioDB(); mas01cr@0: int processArgs(const unsigned argc, char* const argv[]); mas01cr@0: void create(const char* dbName); mas01cr@0: void drop(); mas01cr@0: void insert(const char* dbName, const char* inFile); mas01cr@0: void batchinsert(const char* dbName, const char* inFile); mas01cr@0: void query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult=0); mas01cr@0: void status(const char* dbName); mas01cr@0: void ws_status(const char*dbName, char* hostport); mas01mc@18: void ws_query(const char*dbName, const char *trackKey, const char* hostport); mas01cr@0: void l2norm(const char* dbName); mas01cr@0: void dump(const char* dbName); mas01cr@0: mas01cr@0: // web services mas01cr@0: void startServer(); mas01cr@0: mas01cr@0: }; mas01mc@17: